From a4d9ce3ea25357fb45886f756b7c2c90d1f4b3b7 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:28 -0500 Subject: [PATCH 001/184] New translations index.html (French) --- web/pandas/fr/index.html | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 web/pandas/fr/index.html diff --git a/web/pandas/fr/index.html b/web/pandas/fr/index.html new file mode 100644 index 000000000..2a080a95b --- /dev/null +++ b/web/pandas/fr/index.html @@ -0,0 +1,141 @@ +{% extends "layout.html" %} +{% block body %} +
+
+
+
+

pandas

+

+ pandas est un outil open source d'analyse et de manipulation de données rapide, puissant, flexible et facile à utiliser,
+ conçu à partir du langage de programmation Python. +

+

+ Installer pandas maintenant ! +

+
+ + +
+
Avec le soutien de :
+ {% for row in sponsors.active | batch(6, "") %} +
+ {% for company in row %} +
+ {% if company %} + + {{ company.name }} + + {% endif %} +
+ {% endfor %} +
+ {% endfor %} +

La liste complète des entreprises qui soutiennent pandas est disponible sur la page sponsors. +

+
+
+ {% if releases %} +

Dernière version : {{ releases[0].name }}

+ + {% endif %} +

Suivez-nous

+
+ + +
+

Livres recommandés

+

+ + Python for Data Analysis + +

+

+ + Pandas Cookbook, Third Edition + +

+

+ + Effective pandas 2 + +

+ {% if releases[1:5] %} +

Versions précédentes

+
    + {% for release in releases[1:5] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} + {% if releases[5:] %} +

+ +

+
    + {% for release in releases[5:] %} +
  • + {{ release.name }} ({{ release.published.strftime("%Y-%m-%d") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} +
+
+
+ +{% endblock %} From d19c1db25de4f40bc78eb4b5025e3e1c760f278a Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:29 -0500 Subject: [PATCH 002/184] New translations index.html (Spanish) --- web/pandas/es/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/pandas/es/index.html b/web/pandas/es/index.html index 30ef04dd1..81dc9c20f 100644 --- a/web/pandas/es/index.html +++ b/web/pandas/es/index.html @@ -64,7 +64,7 @@
Con el soporte de:

Última versión: {{ releases[0].name }}

From b5bfc6074f00445759ee0f3627daead38cfd0dcb Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:30 -0500 Subject: [PATCH 003/184] New translations index.html (Arabic) --- web/pandas/ar/index.html | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 web/pandas/ar/index.html diff --git a/web/pandas/ar/index.html b/web/pandas/ar/index.html new file mode 100644 index 000000000..c520a16b8 --- /dev/null +++ b/web/pandas/ar/index.html @@ -0,0 +1,141 @@ +{% extends "layout.html" %} +{% block body %} +
+
+
+
+

pandas

+

+ pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
+ built on top of the Python programming language. +

+

+ Install pandas now! +

+
+ +
+
+
Getting started
+ +
+ +
+
Community
+ +
+
+
+
With the support of:
+ {% for row in sponsors.active | batch(6, "") %} +
+ {% for company in row %} +
+ {% if company %} + + {{ company.name }} + + {% endif %} +
+ {% endfor %} +
+ {% endfor %} +

The full list of companies supporting pandas is available in the sponsors page. +

+
+
+ {% if releases %} +

Latest version: {{ releases[0].name }}

+ + {% endif %} +

Follow us

+
+ + +
+

Recommended books

+

+ + Python for Data Analysis + +

+

+ + Pandas Cookbook, Third Edition + +

+

+ + Effective pandas 2 + +

+ {% if releases[1:5] %} +

Previous versions

+
    + {% for release in releases[1:5] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} + {% if releases[5:] %} +

+ +

+
    + {% for release in releases[5:] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} +
+
+
+ +{% endblock %} From aa14634c7c0837eebd39ada4c416c3cedf38086e Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:31 -0500 Subject: [PATCH 004/184] New translations index.html (Catalan) --- web/pandas/ca/index.html | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 web/pandas/ca/index.html diff --git a/web/pandas/ca/index.html b/web/pandas/ca/index.html new file mode 100644 index 000000000..c520a16b8 --- /dev/null +++ b/web/pandas/ca/index.html @@ -0,0 +1,141 @@ +{% extends "layout.html" %} +{% block body %} +
+
+
+
+

pandas

+

+ pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
+ built on top of the Python programming language. +

+

+ Install pandas now! +

+
+ +
+
+
Getting started
+ +
+ +
+
Community
+ +
+
+
+
With the support of:
+ {% for row in sponsors.active | batch(6, "") %} +
+ {% for company in row %} +
+ {% if company %} + + {{ company.name }} + + {% endif %} +
+ {% endfor %} +
+ {% endfor %} +

The full list of companies supporting pandas is available in the sponsors page. +

+
+
+ {% if releases %} +

Latest version: {{ releases[0].name }}

+ + {% endif %} +

Follow us

+
+ + +
+

Recommended books

+

+ + Python for Data Analysis + +

+

+ + Pandas Cookbook, Third Edition + +

+

+ + Effective pandas 2 + +

+ {% if releases[1:5] %} +

Previous versions

+
    + {% for release in releases[1:5] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} + {% if releases[5:] %} +

+ +

+
    + {% for release in releases[5:] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} +
+
+
+ +{% endblock %} From b3e703e7cc7d2b147cf972921cc1e89317b691ef Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:32 -0500 Subject: [PATCH 005/184] New translations index.html (Japanese) --- web/pandas/ja/index.html | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 web/pandas/ja/index.html diff --git a/web/pandas/ja/index.html b/web/pandas/ja/index.html new file mode 100644 index 000000000..b013da9c0 --- /dev/null +++ b/web/pandas/ja/index.html @@ -0,0 +1,141 @@ +{% extends "layout.html" %} +{% block body %} +
+
+
+
+

pandas

+

+ pandas は、高速、強力で、柔軟性があり、使いやすいオープンソースのデータ分析および操作ツールで、
+ Python プログラミング言語の上に構築されています。 +

+

+ 今すぐ pandas をインストールしましょう! +

+
+ + +
+
With the support of:
+ {% for row in sponsors.active | batch(6, "") %} +
+ {% for company in row %} +
+ {% if company %} + + {{ company.name }} + + {% endif %} +
+ {% endfor %} +
+ {% endfor %} +

The full list of companies supporting pandas is available in the sponsors page. +

+
+
+ {% if releases %} +

Latest version: {{ releases[0].name }}

+ + {% endif %} +

Follow us

+
+ + +
+

Recommended books

+

+ + Python for Data Analysis + +

+

+ + Pandas Cookbook, Third Edition + +

+

+ + Effective pandas 2 + +

+ {% if releases[1:5] %} +

Previous versions

+
    + {% for release in releases[1:5] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} + {% if releases[5:] %} +

+ +

+
    + {% for release in releases[5:] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} +
+
+
+ +{% endblock %} From 1a9acfcc5f163a6d9818237e056d31cdbde55e7c Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:33 -0500 Subject: [PATCH 006/184] New translations index.html (Korean) --- web/pandas/ko/index.html | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 web/pandas/ko/index.html diff --git a/web/pandas/ko/index.html b/web/pandas/ko/index.html new file mode 100644 index 000000000..c520a16b8 --- /dev/null +++ b/web/pandas/ko/index.html @@ -0,0 +1,141 @@ +{% extends "layout.html" %} +{% block body %} +
+
+
+
+

pandas

+

+ pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
+ built on top of the Python programming language. +

+

+ Install pandas now! +

+
+ +
+
+
Getting started
+ +
+ +
+
Community
+ +
+
+
+
With the support of:
+ {% for row in sponsors.active | batch(6, "") %} +
+ {% for company in row %} +
+ {% if company %} + + {{ company.name }} + + {% endif %} +
+ {% endfor %} +
+ {% endfor %} +

The full list of companies supporting pandas is available in the sponsors page. +

+
+
+ {% if releases %} +

Latest version: {{ releases[0].name }}

+ + {% endif %} +

Follow us

+
+ + +
+

Recommended books

+

+ + Python for Data Analysis + +

+

+ + Pandas Cookbook, Third Edition + +

+

+ + Effective pandas 2 + +

+ {% if releases[1:5] %} +

Previous versions

+
    + {% for release in releases[1:5] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} + {% if releases[5:] %} +

+ +

+
    + {% for release in releases[5:] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} +
+
+
+ +{% endblock %} From c572d58ee67ea470291d0de94b2ab8c9f62a1ce2 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:34 -0500 Subject: [PATCH 007/184] New translations index.html (Polish) --- web/pandas/pl/index.html | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 web/pandas/pl/index.html diff --git a/web/pandas/pl/index.html b/web/pandas/pl/index.html new file mode 100644 index 000000000..c520a16b8 --- /dev/null +++ b/web/pandas/pl/index.html @@ -0,0 +1,141 @@ +{% extends "layout.html" %} +{% block body %} +
+
+
+
+

pandas

+

+ pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
+ built on top of the Python programming language. +

+

+ Install pandas now! +

+
+ +
+
+
Getting started
+ +
+ +
+
Community
+ +
+
+
+
With the support of:
+ {% for row in sponsors.active | batch(6, "") %} +
+ {% for company in row %} +
+ {% if company %} + + {{ company.name }} + + {% endif %} +
+ {% endfor %} +
+ {% endfor %} +

The full list of companies supporting pandas is available in the sponsors page. +

+
+
+ {% if releases %} +

Latest version: {{ releases[0].name }}

+ + {% endif %} +

Follow us

+
+ + +
+

Recommended books

+

+ + Python for Data Analysis + +

+

+ + Pandas Cookbook, Third Edition + +

+

+ + Effective pandas 2 + +

+ {% if releases[1:5] %} +

Previous versions

+
    + {% for release in releases[1:5] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} + {% if releases[5:] %} +

+ +

+
    + {% for release in releases[5:] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} +
+
+
+ +{% endblock %} From 5805d65c6676091d0e6588135c6431d41baa7d99 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:35 -0500 Subject: [PATCH 008/184] New translations index.html (Russian) --- web/pandas/ru/index.html | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 web/pandas/ru/index.html diff --git a/web/pandas/ru/index.html b/web/pandas/ru/index.html new file mode 100644 index 000000000..c520a16b8 --- /dev/null +++ b/web/pandas/ru/index.html @@ -0,0 +1,141 @@ +{% extends "layout.html" %} +{% block body %} +
+
+
+
+

pandas

+

+ pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
+ built on top of the Python programming language. +

+

+ Install pandas now! +

+
+ +
+
+
Getting started
+ +
+ +
+
Community
+ +
+
+
+
With the support of:
+ {% for row in sponsors.active | batch(6, "") %} +
+ {% for company in row %} +
+ {% if company %} + + {{ company.name }} + + {% endif %} +
+ {% endfor %} +
+ {% endfor %} +

The full list of companies supporting pandas is available in the sponsors page. +

+
+
+ {% if releases %} +

Latest version: {{ releases[0].name }}

+ + {% endif %} +

Follow us

+
+ + +
+

Recommended books

+

+ + Python for Data Analysis + +

+

+ + Pandas Cookbook, Third Edition + +

+

+ + Effective pandas 2 + +

+ {% if releases[1:5] %} +

Previous versions

+
    + {% for release in releases[1:5] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} + {% if releases[5:] %} +

+ +

+
    + {% for release in releases[5:] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} +
+
+
+ +{% endblock %} From 3a6ea6607059defe5025930a9eb6741212581141 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:36 -0500 Subject: [PATCH 009/184] New translations index.html (Chinese Simplified) --- web/pandas/zh/index.html | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 web/pandas/zh/index.html diff --git a/web/pandas/zh/index.html b/web/pandas/zh/index.html new file mode 100644 index 000000000..c520a16b8 --- /dev/null +++ b/web/pandas/zh/index.html @@ -0,0 +1,141 @@ +{% extends "layout.html" %} +{% block body %} +
+
+
+
+

pandas

+

+ pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
+ built on top of the Python programming language. +

+

+ Install pandas now! +

+
+ +
+
+
Getting started
+ +
+ +
+
Community
+ +
+
+
+
With the support of:
+ {% for row in sponsors.active | batch(6, "") %} +
+ {% for company in row %} +
+ {% if company %} + + {{ company.name }} + + {% endif %} +
+ {% endfor %} +
+ {% endfor %} +

The full list of companies supporting pandas is available in the sponsors page. +

+
+
+ {% if releases %} +

Latest version: {{ releases[0].name }}

+ + {% endif %} +

Follow us

+
+ + +
+

Recommended books

+

+ + Python for Data Analysis + +

+

+ + Pandas Cookbook, Third Edition + +

+

+ + Effective pandas 2 + +

+ {% if releases[1:5] %} +

Previous versions

+
    + {% for release in releases[1:5] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} + {% if releases[5:] %} +

+ +

+
    + {% for release in releases[5:] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} +
+
+
+ +{% endblock %} From 77c45b84ceb061c9a6741ac2f443efba7d3e02de Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:38 -0500 Subject: [PATCH 010/184] New translations index.html (Persian) --- web/pandas/fa/index.html | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 web/pandas/fa/index.html diff --git a/web/pandas/fa/index.html b/web/pandas/fa/index.html new file mode 100644 index 000000000..c520a16b8 --- /dev/null +++ b/web/pandas/fa/index.html @@ -0,0 +1,141 @@ +{% extends "layout.html" %} +{% block body %} +
+
+
+
+

pandas

+

+ pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
+ built on top of the Python programming language. +

+

+ Install pandas now! +

+
+ +
+
+
Getting started
+ +
+ +
+
Community
+ +
+
+
+
With the support of:
+ {% for row in sponsors.active | batch(6, "") %} +
+ {% for company in row %} +
+ {% if company %} + + {{ company.name }} + + {% endif %} +
+ {% endfor %} +
+ {% endfor %} +

The full list of companies supporting pandas is available in the sponsors page. +

+
+
+ {% if releases %} +

Latest version: {{ releases[0].name }}

+ + {% endif %} +

Follow us

+
+ + +
+

Recommended books

+

+ + Python for Data Analysis + +

+

+ + Pandas Cookbook, Third Edition + +

+

+ + Effective pandas 2 + +

+ {% if releases[1:5] %} +

Previous versions

+
    + {% for release in releases[1:5] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} + {% if releases[5:] %} +

+ +

+
    + {% for release in releases[5:] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} +
+
+
+ +{% endblock %} From 3db358e8d0ba870f465751cda332279722fa51df Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:39 -0500 Subject: [PATCH 011/184] New translations index.html (Tamil) --- web/pandas/ta/index.html | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 web/pandas/ta/index.html diff --git a/web/pandas/ta/index.html b/web/pandas/ta/index.html new file mode 100644 index 000000000..c520a16b8 --- /dev/null +++ b/web/pandas/ta/index.html @@ -0,0 +1,141 @@ +{% extends "layout.html" %} +{% block body %} +
+
+
+
+

pandas

+

+ pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
+ built on top of the Python programming language. +

+

+ Install pandas now! +

+
+ +
+
+
Getting started
+ +
+ +
+
Community
+ +
+
+
+
With the support of:
+ {% for row in sponsors.active | batch(6, "") %} +
+ {% for company in row %} +
+ {% if company %} + + {{ company.name }} + + {% endif %} +
+ {% endfor %} +
+ {% endfor %} +

The full list of companies supporting pandas is available in the sponsors page. +

+
+
+ {% if releases %} +

Latest version: {{ releases[0].name }}

+ + {% endif %} +

Follow us

+
+ + +
+

Recommended books

+

+ + Python for Data Analysis + +

+

+ + Pandas Cookbook, Third Edition + +

+

+ + Effective pandas 2 + +

+ {% if releases[1:5] %} +

Previous versions

+
    + {% for release in releases[1:5] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} + {% if releases[5:] %} +

+ +

+
    + {% for release in releases[5:] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} +
+
+
+ +{% endblock %} From 5d6c793531ed5468cf01c593d36d80393bb42419 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:40 -0500 Subject: [PATCH 012/184] New translations index.html (Hindi) --- web/pandas/hi/index.html | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 web/pandas/hi/index.html diff --git a/web/pandas/hi/index.html b/web/pandas/hi/index.html new file mode 100644 index 000000000..c520a16b8 --- /dev/null +++ b/web/pandas/hi/index.html @@ -0,0 +1,141 @@ +{% extends "layout.html" %} +{% block body %} +
+
+
+
+

pandas

+

+ pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
+ built on top of the Python programming language. +

+

+ Install pandas now! +

+
+ +
+
+
Getting started
+ +
+ +
+
Community
+ +
+
+
+
With the support of:
+ {% for row in sponsors.active | batch(6, "") %} +
+ {% for company in row %} +
+ {% if company %} + + {{ company.name }} + + {% endif %} +
+ {% endfor %} +
+ {% endfor %} +

The full list of companies supporting pandas is available in the sponsors page. +

+
+
+ {% if releases %} +

Latest version: {{ releases[0].name }}

+ + {% endif %} +

Follow us

+
+ + +
+

Recommended books

+

+ + Python for Data Analysis + +

+

+ + Pandas Cookbook, Third Edition + +

+

+ + Effective pandas 2 + +

+ {% if releases[1:5] %} +

Previous versions

+
    + {% for release in releases[1:5] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} + {% if releases[5:] %} +

+ +

+
    + {% for release in releases[5:] %} +
  • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
    + changelog | + docs | + code +
  • + {% endfor %} +
+ {% endif %} +
+
+
+ +{% endblock %} From 981a59d3785a1dcc4497fb08b1528bc7c78f34a6 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:42 -0500 Subject: [PATCH 013/184] New translations contribute.md (French) --- web/pandas/fr/contribute.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 web/pandas/fr/contribute.md diff --git a/web/pandas/fr/contribute.md b/web/pandas/fr/contribute.md new file mode 100644 index 000000000..3307ddcfb --- /dev/null +++ b/web/pandas/fr/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page]({{ base_url }}about/team.html), +and about current sponsors in the [sponsors page]({{ base_url }}about/sponsors.html). + +
+
+
+
+ + + + +

Corporate support

+

+ pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

+

More information in the sponsors page

+
+
+ + + + +

Individual contributors

+

+ pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

+

More information in the contributing page

+
+
+ + + + +

Donations

+

+ Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

+

Make your donation in the donate page

+
+
+
+
From f020d1f6f0c7f684e719ec3ad6c61053a3c5a8fa Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:43 -0500 Subject: [PATCH 014/184] New translations contribute.md (Arabic) --- web/pandas/ar/contribute.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 web/pandas/ar/contribute.md diff --git a/web/pandas/ar/contribute.md b/web/pandas/ar/contribute.md new file mode 100644 index 000000000..3307ddcfb --- /dev/null +++ b/web/pandas/ar/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page]({{ base_url }}about/team.html), +and about current sponsors in the [sponsors page]({{ base_url }}about/sponsors.html). + +
+
+
+
+ + + + +

Corporate support

+

+ pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

+

More information in the sponsors page

+
+
+ + + + +

Individual contributors

+

+ pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

+

More information in the contributing page

+
+
+ + + + +

Donations

+

+ Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

+

Make your donation in the donate page

+
+
+
+
From ab949e2c2699316d60a798c5747146689128fc41 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:44 -0500 Subject: [PATCH 015/184] New translations contribute.md (Catalan) --- web/pandas/ca/contribute.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 web/pandas/ca/contribute.md diff --git a/web/pandas/ca/contribute.md b/web/pandas/ca/contribute.md new file mode 100644 index 000000000..3307ddcfb --- /dev/null +++ b/web/pandas/ca/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page]({{ base_url }}about/team.html), +and about current sponsors in the [sponsors page]({{ base_url }}about/sponsors.html). + +
+
+
+
+ + + + +

Corporate support

+

+ pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

+

More information in the sponsors page

+
+
+ + + + +

Individual contributors

+

+ pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

+

More information in the contributing page

+
+
+ + + + +

Donations

+

+ Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

+

Make your donation in the donate page

+
+
+
+
From 6dfcccf3380ebfbc2cd1f95d6d20521d3b37dee7 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:45 -0500 Subject: [PATCH 016/184] New translations contribute.md (Japanese) --- web/pandas/ja/contribute.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 web/pandas/ja/contribute.md diff --git a/web/pandas/ja/contribute.md b/web/pandas/ja/contribute.md new file mode 100644 index 000000000..3307ddcfb --- /dev/null +++ b/web/pandas/ja/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page]({{ base_url }}about/team.html), +and about current sponsors in the [sponsors page]({{ base_url }}about/sponsors.html). + +
+
+
+
+ + + + +

Corporate support

+

+ pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

+

More information in the sponsors page

+
+
+ + + + +

Individual contributors

+

+ pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

+

More information in the contributing page

+
+
+ + + + +

Donations

+

+ Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

+

Make your donation in the donate page

+
+
+
+
From b8acdbd699b1ed5e7b06f2cccd990efbc719d1be Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:46 -0500 Subject: [PATCH 017/184] New translations contribute.md (Korean) --- web/pandas/ko/contribute.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 web/pandas/ko/contribute.md diff --git a/web/pandas/ko/contribute.md b/web/pandas/ko/contribute.md new file mode 100644 index 000000000..3307ddcfb --- /dev/null +++ b/web/pandas/ko/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page]({{ base_url }}about/team.html), +and about current sponsors in the [sponsors page]({{ base_url }}about/sponsors.html). + +
+
+
+
+ + + + +

Corporate support

+

+ pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

+

More information in the sponsors page

+
+
+ + + + +

Individual contributors

+

+ pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

+

More information in the contributing page

+
+
+ + + + +

Donations

+

+ Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

+

Make your donation in the donate page

+
+
+
+
From 4dcaafc3de9c68cf8af703436d695d76d8e545c4 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:48 -0500 Subject: [PATCH 018/184] New translations contribute.md (Polish) --- web/pandas/pl/contribute.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 web/pandas/pl/contribute.md diff --git a/web/pandas/pl/contribute.md b/web/pandas/pl/contribute.md new file mode 100644 index 000000000..3307ddcfb --- /dev/null +++ b/web/pandas/pl/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page]({{ base_url }}about/team.html), +and about current sponsors in the [sponsors page]({{ base_url }}about/sponsors.html). + +
+
+
+
+ + + + +

Corporate support

+

+ pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

+

More information in the sponsors page

+
+
+ + + + +

Individual contributors

+

+ pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

+

More information in the contributing page

+
+
+ + + + +

Donations

+

+ Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

+

Make your donation in the donate page

+
+
+
+
From 75635c9aa7a6c0ee9c54330de38ce0b2d2d24943 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:49 -0500 Subject: [PATCH 019/184] New translations contribute.md (Russian) --- web/pandas/ru/contribute.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 web/pandas/ru/contribute.md diff --git a/web/pandas/ru/contribute.md b/web/pandas/ru/contribute.md new file mode 100644 index 000000000..3307ddcfb --- /dev/null +++ b/web/pandas/ru/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page]({{ base_url }}about/team.html), +and about current sponsors in the [sponsors page]({{ base_url }}about/sponsors.html). + +
+
+
+
+ + + + +

Corporate support

+

+ pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

+

More information in the sponsors page

+
+
+ + + + +

Individual contributors

+

+ pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

+

More information in the contributing page

+
+
+ + + + +

Donations

+

+ Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

+

Make your donation in the donate page

+
+
+
+
From c89de314486b068e5bc2bff16013d592d085edcd Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:50 -0500 Subject: [PATCH 020/184] New translations contribute.md (Chinese Simplified) --- web/pandas/zh/contribute.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 web/pandas/zh/contribute.md diff --git a/web/pandas/zh/contribute.md b/web/pandas/zh/contribute.md new file mode 100644 index 000000000..3307ddcfb --- /dev/null +++ b/web/pandas/zh/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page]({{ base_url }}about/team.html), +and about current sponsors in the [sponsors page]({{ base_url }}about/sponsors.html). + +
+
+
+
+ + + + +

Corporate support

+

+ pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

+

More information in the sponsors page

+
+
+ + + + +

Individual contributors

+

+ pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

+

More information in the contributing page

+
+
+ + + + +

Donations

+

+ Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

+

Make your donation in the donate page

+
+
+
+
From 6ab23d468075a01dcafc470f73a521d8195a02a4 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:51 -0500 Subject: [PATCH 021/184] New translations contribute.md (Persian) --- web/pandas/fa/contribute.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 web/pandas/fa/contribute.md diff --git a/web/pandas/fa/contribute.md b/web/pandas/fa/contribute.md new file mode 100644 index 000000000..3307ddcfb --- /dev/null +++ b/web/pandas/fa/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page]({{ base_url }}about/team.html), +and about current sponsors in the [sponsors page]({{ base_url }}about/sponsors.html). + +
+
+
+
+ + + + +

Corporate support

+

+ pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

+

More information in the sponsors page

+
+
+ + + + +

Individual contributors

+

+ pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

+

More information in the contributing page

+
+
+ + + + +

Donations

+

+ Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

+

Make your donation in the donate page

+
+
+
+
From 40d70fc8c161f9c3c9b20a1018aa56789048e3de Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:52 -0500 Subject: [PATCH 022/184] New translations contribute.md (Tamil) --- web/pandas/ta/contribute.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 web/pandas/ta/contribute.md diff --git a/web/pandas/ta/contribute.md b/web/pandas/ta/contribute.md new file mode 100644 index 000000000..3307ddcfb --- /dev/null +++ b/web/pandas/ta/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page]({{ base_url }}about/team.html), +and about current sponsors in the [sponsors page]({{ base_url }}about/sponsors.html). + +
+
+
+
+ + + + +

Corporate support

+

+ pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

+

More information in the sponsors page

+
+
+ + + + +

Individual contributors

+

+ pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

+

More information in the contributing page

+
+
+ + + + +

Donations

+

+ Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

+

Make your donation in the donate page

+
+
+
+
From c4dafbe9f2921419e37d586c5d6c4c8b6bd2d0e4 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:54 -0500 Subject: [PATCH 023/184] New translations contribute.md (Hindi) --- web/pandas/hi/contribute.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 web/pandas/hi/contribute.md diff --git a/web/pandas/hi/contribute.md b/web/pandas/hi/contribute.md new file mode 100644 index 000000000..3307ddcfb --- /dev/null +++ b/web/pandas/hi/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page]({{ base_url }}about/team.html), +and about current sponsors in the [sponsors page]({{ base_url }}about/sponsors.html). + +
+
+
+
+ + + + +

Corporate support

+

+ pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

+

More information in the sponsors page

+
+
+ + + + +

Individual contributors

+

+ pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

+

More information in the contributing page

+
+
+ + + + +

Donations

+

+ Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

+

Make your donation in the donate page

+
+
+
+
From 3aaf0fe410b7a3005176590f07633f5b7cf99f0f Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:55 -0500 Subject: [PATCH 024/184] New translations getting_started.md (French) --- web/pandas/fr/getting_started.md | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 web/pandas/fr/getting_started.md diff --git a/web/pandas/fr/getting_started.md b/web/pandas/fr/getting_started.md new file mode 100644 index 000000000..8ba98924e --- /dev/null +++ b/web/pandas/fr/getting_started.md @@ -0,0 +1,44 @@ +# Prise en main + +## Installation instructions + +Pour installer pandas, veuillez consulter la [page d'installation]({{ base_url}}docs/getting_started/install.html) +de la documentation pandas. + +## Tutorials + +You can learn more about pandas in the [tutorials]({{ base_url }}docs/getting_started/intro_tutorials/), +and more about JupyterLab in the +[JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). + +## Livres + +Le livre que nous recommandons pour apprendre pandas est [Python for Data Analysis](https://amzn.to/3DyLaJc), +de [Wes McKinney](https://wesmckinney.com/), créateur de pandas. + + + Python for Data Analysis + + +## Videos + + + +## Cheat Sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) + +## Try pandas in your browser (experimental) + +You can try pandas in your browser with the following interactive shell +without needing to install anything on your system. + +

+ Try it in your browser +

From 8d4dc1fa94d6479f6d7ffadb8357eb2f2a7545ec Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:56 -0500 Subject: [PATCH 025/184] New translations getting_started.md (Arabic) --- web/pandas/ar/getting_started.md | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 web/pandas/ar/getting_started.md diff --git a/web/pandas/ar/getting_started.md b/web/pandas/ar/getting_started.md new file mode 100644 index 000000000..c556eda57 --- /dev/null +++ b/web/pandas/ar/getting_started.md @@ -0,0 +1,44 @@ +# Getting started + +## Installation instructions + +To install pandas, please reference the [installation page]({{ base_url}}docs/getting_started/install.html) +from the pandas documentation. + +## Tutorials + +You can learn more about pandas in the [tutorials]({{ base_url }}docs/getting_started/intro_tutorials/), +and more about JupyterLab in the +[JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). + +## Books + +The book we recommend to learn pandas is [Python for Data Analysis](https://amzn.to/3DyLaJc), +by [Wes McKinney](https://wesmckinney.com/), creator of pandas. + + + Python for Data Analysis + + +## Videos + + + +## Cheat sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) + +## Try pandas in your browser (experimental) + +You can try pandas in your browser with the following interactive shell +without needing to install anything on your system. + +

+ Try it in your browser +

From 5fec76f94e8f3f52eb125e948c45d5d219a2a94d Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:57 -0500 Subject: [PATCH 026/184] New translations getting_started.md (Catalan) --- web/pandas/ca/getting_started.md | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 web/pandas/ca/getting_started.md diff --git a/web/pandas/ca/getting_started.md b/web/pandas/ca/getting_started.md new file mode 100644 index 000000000..c556eda57 --- /dev/null +++ b/web/pandas/ca/getting_started.md @@ -0,0 +1,44 @@ +# Getting started + +## Installation instructions + +To install pandas, please reference the [installation page]({{ base_url}}docs/getting_started/install.html) +from the pandas documentation. + +## Tutorials + +You can learn more about pandas in the [tutorials]({{ base_url }}docs/getting_started/intro_tutorials/), +and more about JupyterLab in the +[JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). + +## Books + +The book we recommend to learn pandas is [Python for Data Analysis](https://amzn.to/3DyLaJc), +by [Wes McKinney](https://wesmckinney.com/), creator of pandas. + + + Python for Data Analysis + + +## Videos + + + +## Cheat sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) + +## Try pandas in your browser (experimental) + +You can try pandas in your browser with the following interactive shell +without needing to install anything on your system. + +

+ Try it in your browser +

From 344e1599e8f6e3bedadbaaffc016f3e9c3351ee3 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:06:58 -0500 Subject: [PATCH 027/184] New translations getting_started.md (Japanese) --- web/pandas/ja/getting_started.md | 40 ++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/ja/getting_started.md diff --git a/web/pandas/ja/getting_started.md b/web/pandas/ja/getting_started.md new file mode 100644 index 000000000..4ab1e3d7e --- /dev/null +++ b/web/pandas/ja/getting_started.md @@ -0,0 +1,40 @@ +# はじめに + +## Installation instructions + +pandasをインストールするには、pandasのドキュメントの[インストールページ]({{ base_url}}docs/getting_started/install.html)を参照してください。 + +## Tutorials + +pandas については、[チュートリアル]({{ base_url }}docs/getting_started/intro_tutorials/) で、JupyterLab については、[JupyterLab のドキュメント](https://jupyterlab.readthedocs.io/en/stable/user/interface.html) で詳しく学ぶことができます。 + +## 書籍 + +pandasを学ぶためにおすすめする書籍は、pandasの作者である [Wes McKinney](https://wesmckinney.com/) による [Python for Data Analysis](\[https://amzn.to/3DyLaJc)です。 + + + Python for Data Analysis + + +## Videos + + + +## Cheat sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) + +## Try pandas in your browser (experimental) + +You can try pandas in your browser with the following interactive shell +without needing to install anything on your system. + +

+ Try it in your browser +

From 5192f8c6f5bb118e4de9c1359be75c0bc4aa8574 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:00 -0500 Subject: [PATCH 028/184] New translations getting_started.md (Korean) --- web/pandas/ko/getting_started.md | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 web/pandas/ko/getting_started.md diff --git a/web/pandas/ko/getting_started.md b/web/pandas/ko/getting_started.md new file mode 100644 index 000000000..c556eda57 --- /dev/null +++ b/web/pandas/ko/getting_started.md @@ -0,0 +1,44 @@ +# Getting started + +## Installation instructions + +To install pandas, please reference the [installation page]({{ base_url}}docs/getting_started/install.html) +from the pandas documentation. + +## Tutorials + +You can learn more about pandas in the [tutorials]({{ base_url }}docs/getting_started/intro_tutorials/), +and more about JupyterLab in the +[JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). + +## Books + +The book we recommend to learn pandas is [Python for Data Analysis](https://amzn.to/3DyLaJc), +by [Wes McKinney](https://wesmckinney.com/), creator of pandas. + + + Python for Data Analysis + + +## Videos + + + +## Cheat sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) + +## Try pandas in your browser (experimental) + +You can try pandas in your browser with the following interactive shell +without needing to install anything on your system. + +

+ Try it in your browser +

From 5cc81fc0353f9708c61dcd5f2b9f4d58f52cf88b Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:01 -0500 Subject: [PATCH 029/184] New translations getting_started.md (Polish) --- web/pandas/pl/getting_started.md | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 web/pandas/pl/getting_started.md diff --git a/web/pandas/pl/getting_started.md b/web/pandas/pl/getting_started.md new file mode 100644 index 000000000..c556eda57 --- /dev/null +++ b/web/pandas/pl/getting_started.md @@ -0,0 +1,44 @@ +# Getting started + +## Installation instructions + +To install pandas, please reference the [installation page]({{ base_url}}docs/getting_started/install.html) +from the pandas documentation. + +## Tutorials + +You can learn more about pandas in the [tutorials]({{ base_url }}docs/getting_started/intro_tutorials/), +and more about JupyterLab in the +[JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). + +## Books + +The book we recommend to learn pandas is [Python for Data Analysis](https://amzn.to/3DyLaJc), +by [Wes McKinney](https://wesmckinney.com/), creator of pandas. + + + Python for Data Analysis + + +## Videos + + + +## Cheat sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) + +## Try pandas in your browser (experimental) + +You can try pandas in your browser with the following interactive shell +without needing to install anything on your system. + +

+ Try it in your browser +

From 8d84e23181961a5f81ddc8a5b7349b0b90775972 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:02 -0500 Subject: [PATCH 030/184] New translations getting_started.md (Russian) --- web/pandas/ru/getting_started.md | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 web/pandas/ru/getting_started.md diff --git a/web/pandas/ru/getting_started.md b/web/pandas/ru/getting_started.md new file mode 100644 index 000000000..c556eda57 --- /dev/null +++ b/web/pandas/ru/getting_started.md @@ -0,0 +1,44 @@ +# Getting started + +## Installation instructions + +To install pandas, please reference the [installation page]({{ base_url}}docs/getting_started/install.html) +from the pandas documentation. + +## Tutorials + +You can learn more about pandas in the [tutorials]({{ base_url }}docs/getting_started/intro_tutorials/), +and more about JupyterLab in the +[JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). + +## Books + +The book we recommend to learn pandas is [Python for Data Analysis](https://amzn.to/3DyLaJc), +by [Wes McKinney](https://wesmckinney.com/), creator of pandas. + + + Python for Data Analysis + + +## Videos + + + +## Cheat sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) + +## Try pandas in your browser (experimental) + +You can try pandas in your browser with the following interactive shell +without needing to install anything on your system. + +

+ Try it in your browser +

From e7021ae580c600bd0917dc3535a0210d9f5c68f5 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:03 -0500 Subject: [PATCH 031/184] New translations getting_started.md (Chinese Simplified) --- web/pandas/zh/getting_started.md | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 web/pandas/zh/getting_started.md diff --git a/web/pandas/zh/getting_started.md b/web/pandas/zh/getting_started.md new file mode 100644 index 000000000..c556eda57 --- /dev/null +++ b/web/pandas/zh/getting_started.md @@ -0,0 +1,44 @@ +# Getting started + +## Installation instructions + +To install pandas, please reference the [installation page]({{ base_url}}docs/getting_started/install.html) +from the pandas documentation. + +## Tutorials + +You can learn more about pandas in the [tutorials]({{ base_url }}docs/getting_started/intro_tutorials/), +and more about JupyterLab in the +[JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). + +## Books + +The book we recommend to learn pandas is [Python for Data Analysis](https://amzn.to/3DyLaJc), +by [Wes McKinney](https://wesmckinney.com/), creator of pandas. + + + Python for Data Analysis + + +## Videos + + + +## Cheat sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) + +## Try pandas in your browser (experimental) + +You can try pandas in your browser with the following interactive shell +without needing to install anything on your system. + +

+ Try it in your browser +

From 7a459ba8fcae2bcd8244a6b8b1994ffaae4a8969 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:04 -0500 Subject: [PATCH 032/184] New translations getting_started.md (Persian) --- web/pandas/fa/getting_started.md | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 web/pandas/fa/getting_started.md diff --git a/web/pandas/fa/getting_started.md b/web/pandas/fa/getting_started.md new file mode 100644 index 000000000..c556eda57 --- /dev/null +++ b/web/pandas/fa/getting_started.md @@ -0,0 +1,44 @@ +# Getting started + +## Installation instructions + +To install pandas, please reference the [installation page]({{ base_url}}docs/getting_started/install.html) +from the pandas documentation. + +## Tutorials + +You can learn more about pandas in the [tutorials]({{ base_url }}docs/getting_started/intro_tutorials/), +and more about JupyterLab in the +[JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). + +## Books + +The book we recommend to learn pandas is [Python for Data Analysis](https://amzn.to/3DyLaJc), +by [Wes McKinney](https://wesmckinney.com/), creator of pandas. + + + Python for Data Analysis + + +## Videos + + + +## Cheat sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) + +## Try pandas in your browser (experimental) + +You can try pandas in your browser with the following interactive shell +without needing to install anything on your system. + +

+ Try it in your browser +

From cc35741dcad5d596d2185a9b3fe4db5a31c564a9 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:05 -0500 Subject: [PATCH 033/184] New translations getting_started.md (Tamil) --- web/pandas/ta/getting_started.md | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 web/pandas/ta/getting_started.md diff --git a/web/pandas/ta/getting_started.md b/web/pandas/ta/getting_started.md new file mode 100644 index 000000000..c556eda57 --- /dev/null +++ b/web/pandas/ta/getting_started.md @@ -0,0 +1,44 @@ +# Getting started + +## Installation instructions + +To install pandas, please reference the [installation page]({{ base_url}}docs/getting_started/install.html) +from the pandas documentation. + +## Tutorials + +You can learn more about pandas in the [tutorials]({{ base_url }}docs/getting_started/intro_tutorials/), +and more about JupyterLab in the +[JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). + +## Books + +The book we recommend to learn pandas is [Python for Data Analysis](https://amzn.to/3DyLaJc), +by [Wes McKinney](https://wesmckinney.com/), creator of pandas. + + + Python for Data Analysis + + +## Videos + + + +## Cheat sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) + +## Try pandas in your browser (experimental) + +You can try pandas in your browser with the following interactive shell +without needing to install anything on your system. + +

+ Try it in your browser +

From 265730b34ad6ab7f002fcb471d0fbeaa427a40fc Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:06 -0500 Subject: [PATCH 034/184] New translations getting_started.md (Hindi) --- web/pandas/hi/getting_started.md | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 web/pandas/hi/getting_started.md diff --git a/web/pandas/hi/getting_started.md b/web/pandas/hi/getting_started.md new file mode 100644 index 000000000..c556eda57 --- /dev/null +++ b/web/pandas/hi/getting_started.md @@ -0,0 +1,44 @@ +# Getting started + +## Installation instructions + +To install pandas, please reference the [installation page]({{ base_url}}docs/getting_started/install.html) +from the pandas documentation. + +## Tutorials + +You can learn more about pandas in the [tutorials]({{ base_url }}docs/getting_started/intro_tutorials/), +and more about JupyterLab in the +[JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). + +## Books + +The book we recommend to learn pandas is [Python for Data Analysis](https://amzn.to/3DyLaJc), +by [Wes McKinney](https://wesmckinney.com/), creator of pandas. + + + Python for Data Analysis + + +## Videos + + + +## Cheat sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) + +## Try pandas in your browser (experimental) + +You can try pandas in your browser with the following interactive shell +without needing to install anything on your system. + +

+ Try it in your browser +

From fa25c47e707e4f746d24fd51510f129d87995d7b Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:08 -0500 Subject: [PATCH 035/184] New translations governance.md (French) --- web/pandas/fr/about/governance.md | 317 ++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 web/pandas/fr/about/governance.md diff --git a/web/pandas/fr/about/governance.md b/web/pandas/fr/about/governance.md new file mode 100644 index 000000000..b37925e89 --- /dev/null +++ b/web/pandas/fr/about/governance.md @@ -0,0 +1,317 @@ +# Project governance + +The official version of this document, along with a list of +individuals and institutions in the roles defined in the governance +section below, is contained in the +[Project governance]({{ base_url }}about/governance.html) +page of the pandas website. + +## The Project + +The pandas Project (The Project) is an open source software project affiliated +with the 501(c)3 NumFOCUS Foundation. The goal of The Project is to develop open +source software for data ingest, data preparation, data analysis, and data +visualization for the Python programming language. The Software developed by +The Project is released under the BSD (or similar) open source license, +developed openly and hosted in public GitHub repositories under the pandas +GitHub organization. Examples of Project Software +include the main pandas code repository and the pandas-stubs library. + +Through its affiliation with NumFOCUS, The Project has the right to receive +tax-deductible donations in the United States of America. + +The Project is developed by a team of distributed developers, called +Contributors. Contributors are individuals who have contributed code, +documentation, designs or other work to one or more Project repositories. +Anyone can be a Contributor. Contributors can be affiliated with any legal +entity or none. Contributors participate in the project by submitting, +reviewing and discussing GitHub Pull Requests and Issues and participating in +open and public Project discussions on GitHub, mailing lists, and +elsewhere. The foundation of Project participation is openness and +transparency. + +Here is a list of the current Contributors to the main pandas repository: + +[https://github.com/pandas-dev/pandas/graphs/contributors](https://github.com/pandas-dev/pandas/graphs/contributors) + +There are also many other Contributors listed in the logs of other repositories of +the pandas project. + +The Project Community consists of all Contributors and Users of the Project. +Contributors work on behalf of and are responsible to the larger Project +Community and we strive to keep the barrier between Contributors and Users as +low as possible. + +The Project is formally affiliated with the 501(c)3 NumFOCUS Foundation +([https://numfocus.org](https://numfocus.org)), which serves as its fiscal +sponsor, may hold project trademarks and other intellectual property, helps +manage project donations and acts as a parent legal entity. NumFOCUS is the +only legal entity that has a formal relationship with the project (see +Institutional Partners section below). + +## Governance + +This section describes the governance and leadership model of The Project. + +The foundations of Project governance are: + +- Openness & Transparency +- Active Contribution +- Institutional Neutrality + +Traditionally, Project leadership was provided by a BDFL (Wes McKinney) and +subset of Contributors, called the Core Team, whose active and consistent +contributions have been recognized by their receiving “commit rights” to the +Project GitHub repositories. In general all Project decisions are made through +consensus among the Core Team with input from the Community. The BDFL can, but +rarely chooses to, override the Core Team and make a final decision on a +matter. + +While this approach has served us well, as the Project grows and faces more +legal and financial decisions and interacts with other institutions, we see a +need for a more formal governance model. Moving forward The Project leadership +will consist of a BDFL and Core Team. We view this governance model as the +formalization of what we are already doing, rather than a change in direction. + +### BDFL + +The Project will have a BDFL (Benevolent Dictator for Life), who is currently +Wes McKinney. As Dictator, the BDFL has the authority to make all final +decisions for The Project. As Benevolent, the BDFL, in practice chooses to +defer that authority to the consensus of the community discussion channels and +the Core Team. It is expected, and in the past has been the case, that the BDFL +will only rarely assert his/her final authority. Because it is rarely used, we +refer to BDFL’s final authority as a “special” or “overriding” vote. When it +does occur, the BDFL override typically happens in situations where there is a +deadlock in the Core Team or if the Core Team ask the BDFL to make a decision +on a specific matter. To ensure the benevolence of the BDFL, The Project +encourages others to fork the project if they disagree with the overall +direction the BDFL is taking. The BDFL is chair of the Core Team (see below) +and may delegate his/her authority on a particular decision or set of decisions +to any other Core Team Member at his/her discretion. + +The BDFL can appoint his/her successor, but it is expected that the Core Team +would be consulted on this decision. If the BDFL is unable to appoint a +successor (e.g. due to death or illness), the Core Team will choose a successor +by voting with at least 2/3 of the Core Team members voting in favor of the +chosen successor. At least 80% of the Core Team must participate in the +vote. If no BDFL candidate receives 2/3 of the votes of the Core Team, the Core +Team members shall propose the BDFL candidates to the Main NumFOCUS board, who +will then make the final decision. + +### Core Team + +The Project's Core Team will consist of Project Contributors who have produced +contributions that are substantial in quality and quantity, and sustained over +at least one year. The overall role of the Core Team is to ensure, through +working with the BDFL and taking input from the Community, the long-term +well-being of the project, both technically and as a community. + +During the everyday project activities, Core Team participate in all +discussions, code review and other project activities as peers with all other +Contributors and the Community. In these everyday activities, Core Team do not +have any special power or privilege through their membership on the Core +Team. However, it is expected that because of the quality and quantity of their +contributions and their expert knowledge of the Project Software that the Core +Team will provide useful guidance, both technical and in terms of project +direction, to potentially less experienced contributors. + +The Core Team and its Members play a special role in certain situations. +In particular, the Core Team may: + +- Make decisions about the overall scope, vision and direction of the + project. +- Make decisions about strategic collaborations with other organizations or + individuals. +- Make decisions about specific technical issues, features, bugs and pull + requests. They are the primary mechanism of guiding the code review process + and merging pull requests. +- Make decisions about the Services that are run by The Project and manage + those Services for the benefit of the Project and Community. +- Make decisions when regular community discussion doesn't produce consensus + on an issue in a reasonable time frame. + +### Core Team membership + +To become eligible for being a Core Team Member an individual must be a Project +Contributor who has produced contributions that are substantial in quality and +quantity, and sustained over at least one year. Potential Core Team Members are +nominated by existing Core members and voted upon by the existing Core Team +after asking if the potential Member is interested and willing to serve in that +capacity. The Core Team will be initially formed from the set of existing +Contributors who have been granted commit rights as of late 2015. + +When considering potential Members, the Core Team will look at candidates with +a comprehensive view of their contributions. This will include but is not +limited to code, code review, infrastructure work, mailing list and chat +participation, community help/building, education and outreach, design work, +etc. We are deliberately not setting arbitrary quantitative metrics (like “100 +commits in this repo”) to avoid encouraging behavior that plays to the metrics +rather than the project’s overall well-being. We want to encourage a diverse +array of backgrounds, viewpoints and talents in our team, which is why we +explicitly do not define code as the sole metric on which Core Team membership +will be evaluated. + +If a Core Team member becomes inactive in the project for a period of one year, +they will be considered for removal from the Core Team. Before removal, +inactive Member will be approached by the BDFL to see if they plan on returning +to active participation. If not they will be removed immediately upon a Core +Team vote. If they plan on returning to active participation soon, they will be +given a grace period of one year. If they don't return to active participation +within that time period they will be removed by vote of the Core Team without +further grace period. All former Core Team members can be considered for +membership again at any time in the future, like any other Project Contributor. +Retired Core Team members will be listed on the project website, acknowledging +the period during which they were active in the Core Team. + +The Core Team reserves the right to eject current Members, other than the BDFL, +if they are deemed to be actively harmful to the project’s well-being, and +attempts at communication and conflict resolution have failed. + +### Conflict of interest + +It is expected that the BDFL and Core Team Members will be employed at a wide +range of companies, universities and non-profit organizations. Because of this, +it is possible that Members will have conflict of interests. Such conflict of +interests include, but are not limited to: + +- Financial interests, such as investments, employment or contracting work, + outside of The Project that may influence their work on The Project. +- Access to proprietary information of their employer that could potentially + leak into their work with the Project. + +All members of the Core Team, BDFL included, shall disclose to the rest of the +Core Team any conflict of interest they may have. Members with a conflict of +interest in a particular issue may participate in Core Team discussions on that +issue, but must recuse themselves from voting on the issue. If the BDFL has +recused his/herself for a particular decision, they will appoint a substitute +BDFL for that decision. + +### Private communications of the Core Team + +Unless specifically required, all Core Team discussions and activities will be +public and done in collaboration and discussion with the Project Contributors +and Community. The Core Team will have a private mailing list that will be used +sparingly and only when a specific matter requires privacy. When private +communications and decisions are needed, the Core Team will do its best to +summarize those to the Community after eliding personal/private/sensitive +information that should not be posted to the public internet. + +### Subcommittees + +The Core Team can create subcommittees that provide leadership and guidance for +specific aspects of the project. Like the Core Team as a whole, subcommittees +should conduct their business in an open and public manner unless privacy is +specifically called for. Private subcommittee communications should happen on +the main private mailing list of the Core Team unless specifically called for. + +Question: if the BDFL is not on a subcommittee, do they still have override +authority? + +Suggestion: they do, but they should appoint a delegate who plays that role +most of the time, and explicit BDFL intervention is sought only if the +committee disagrees with that delegate’s decision and no resolution is possible +within the team. This is different from a BDFL delegate for a specific decision +(or a recusal situation), where the BDFL is literally giving up his/her +authority to someone else in full. It’s more like what Linus Torvalds uses with his +“lieutenants” model. + +### NumFOCUS Subcommittee + +The Core Team will maintain one narrowly focused subcommittee to manage its +interactions with NumFOCUS. + +- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage + project funding that comes through NumFOCUS. It is expected that these funds + will be spent in a manner that is consistent with the non-profit mission of + NumFOCUS and the direction of the Project as determined by the full Core + Team. +- This Subcommittee shall NOT make decisions about the direction, scope or + technical direction of the Project. +- This Subcommittee will have at least 5 members. No more than 2 Subcommittee + Members can report to one person (either directly or indirectly) through + employment or contracting work (including the reportee, i.e. the reportee + 1 + is the max). This avoids effective majorities resting on one person. + +## Institutional Partners and Funding + +The BDFL and Core Team are the primary leadership for the project. No outside +institution, individual or legal entity has the ability to own, control, usurp +or influence the project other than by participating in the Project as +Contributors and Core Team. However, because institutions are the primary +funding mechanism for the project, it is important to formally acknowledge +institutional participation in the project. These are Institutional Partners. + +An Institutional Contributor is any individual Project Contributor who +contributes to the project as part of their official duties at an Institutional +Partner. Likewise, an Institutional Core Team Member is any Core Team Member +who contributes to the project as part of their official duties at an +Institutional Partner. + +With these definitions, an Institutional Partner is any recognized legal entity +in the United States or elsewhere that employs at least one Institutional +Contributor or Institutional Core Team Member. Institutional Partners can be +for-profit or non-profit entities. + +Institutions become eligible to become an Institutional Partner by employing +individuals who actively contribute to The Project as part of their official +duties. To state this another way, the only way for an Institutional Partner to +influence the project is by actively contributing to the open development of +the project, on equal terms with any other member of the community of +Contributors and Core Team Members. Merely using pandas Software or Services in +an institutional context does not allow an entity to become an Institutional +Partner. Financial gifts do not enable an entity to become an Institutional +Partner. Once an institution becomes eligible for Institutional Partnership, +the Core Team must nominate and approve the Partnership. + +If an existing Institutional Partner no longer has a contributing employee, +they will be given a one-year grace period for other employees to begin +contributing. + +An Institutional Partner is free to pursue funding for their work on The +Project through any legal means. This could involve a non-profit organization +raising money from private foundations and donors or a for-profit company +building proprietary products and services that leverage Project Software and +Services. Funding acquired by Institutional Partners to work on The Project is +called Institutional Funding. However, no funding obtained by an Institutional +Partner can override The Project BDFL and Core Team. If a Partner has funding +to do pandas work and the Core Team decides to not pursue that work as a +project, the Partner is free to pursue it on their own. However in this +situation, that part of the Partner’s work will not be under the pandas +umbrella and cannot use the Project trademarks in a way that suggests a formal +relationship. + +To acknowledge institutional contributions, there are two levels of +Institutional Partners, with associated benefits: + +**Tier 1** = an institution with at least one Institutional Core Team Member + +- Acknowledged on the pandas website, in talks and T-shirts. +- Ability to acknowledge their own funding sources on the pandas website, in + talks and T-shirts. +- Ability to influence the project through the participation of their Core Team + Member. + +**Tier 2** = an institution with at least one Institutional Contributor + +## Breach + +Non-compliance with the terms of the governance documents shall be reported to +the Core Team either through public or private channels as deemed appropriate. + +## Changing the Governance + +Changes to the governance are submitted via a GitHub pull request to The Project's +[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +The pull request is then refined in response to public comment and review, with +the goal being consensus in the community. After this open period, a Core Team +Member proposes to the Core Team that the changes be ratified and the pull +request merged (accepting the proposed changes) or proposes that the pull +request be closed without merging (rejecting the proposed changes). The Member +should state the final commit hash in the pull request being proposed for +acceptance or rejection and briefly summarize the pull request. A minimum of +80% of the Core Team must vote and at least 2/3 of the votes must be positive +to carry out the proposed action (fractions of a vote rounded up to the nearest +integer). Since the BDFL holds ultimate authority in The Project, the BDFL has +authority to act alone in accepting or rejecting changes or overriding Core +Team decisions. From 0f3d6debe442b60563972e11d20d5109a9c03d2e Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:10 -0500 Subject: [PATCH 036/184] New translations governance.md (Arabic) --- web/pandas/ar/about/governance.md | 317 ++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 web/pandas/ar/about/governance.md diff --git a/web/pandas/ar/about/governance.md b/web/pandas/ar/about/governance.md new file mode 100644 index 000000000..b37925e89 --- /dev/null +++ b/web/pandas/ar/about/governance.md @@ -0,0 +1,317 @@ +# Project governance + +The official version of this document, along with a list of +individuals and institutions in the roles defined in the governance +section below, is contained in the +[Project governance]({{ base_url }}about/governance.html) +page of the pandas website. + +## The Project + +The pandas Project (The Project) is an open source software project affiliated +with the 501(c)3 NumFOCUS Foundation. The goal of The Project is to develop open +source software for data ingest, data preparation, data analysis, and data +visualization for the Python programming language. The Software developed by +The Project is released under the BSD (or similar) open source license, +developed openly and hosted in public GitHub repositories under the pandas +GitHub organization. Examples of Project Software +include the main pandas code repository and the pandas-stubs library. + +Through its affiliation with NumFOCUS, The Project has the right to receive +tax-deductible donations in the United States of America. + +The Project is developed by a team of distributed developers, called +Contributors. Contributors are individuals who have contributed code, +documentation, designs or other work to one or more Project repositories. +Anyone can be a Contributor. Contributors can be affiliated with any legal +entity or none. Contributors participate in the project by submitting, +reviewing and discussing GitHub Pull Requests and Issues and participating in +open and public Project discussions on GitHub, mailing lists, and +elsewhere. The foundation of Project participation is openness and +transparency. + +Here is a list of the current Contributors to the main pandas repository: + +[https://github.com/pandas-dev/pandas/graphs/contributors](https://github.com/pandas-dev/pandas/graphs/contributors) + +There are also many other Contributors listed in the logs of other repositories of +the pandas project. + +The Project Community consists of all Contributors and Users of the Project. +Contributors work on behalf of and are responsible to the larger Project +Community and we strive to keep the barrier between Contributors and Users as +low as possible. + +The Project is formally affiliated with the 501(c)3 NumFOCUS Foundation +([https://numfocus.org](https://numfocus.org)), which serves as its fiscal +sponsor, may hold project trademarks and other intellectual property, helps +manage project donations and acts as a parent legal entity. NumFOCUS is the +only legal entity that has a formal relationship with the project (see +Institutional Partners section below). + +## Governance + +This section describes the governance and leadership model of The Project. + +The foundations of Project governance are: + +- Openness & Transparency +- Active Contribution +- Institutional Neutrality + +Traditionally, Project leadership was provided by a BDFL (Wes McKinney) and +subset of Contributors, called the Core Team, whose active and consistent +contributions have been recognized by their receiving “commit rights” to the +Project GitHub repositories. In general all Project decisions are made through +consensus among the Core Team with input from the Community. The BDFL can, but +rarely chooses to, override the Core Team and make a final decision on a +matter. + +While this approach has served us well, as the Project grows and faces more +legal and financial decisions and interacts with other institutions, we see a +need for a more formal governance model. Moving forward The Project leadership +will consist of a BDFL and Core Team. We view this governance model as the +formalization of what we are already doing, rather than a change in direction. + +### BDFL + +The Project will have a BDFL (Benevolent Dictator for Life), who is currently +Wes McKinney. As Dictator, the BDFL has the authority to make all final +decisions for The Project. As Benevolent, the BDFL, in practice chooses to +defer that authority to the consensus of the community discussion channels and +the Core Team. It is expected, and in the past has been the case, that the BDFL +will only rarely assert his/her final authority. Because it is rarely used, we +refer to BDFL’s final authority as a “special” or “overriding” vote. When it +does occur, the BDFL override typically happens in situations where there is a +deadlock in the Core Team or if the Core Team ask the BDFL to make a decision +on a specific matter. To ensure the benevolence of the BDFL, The Project +encourages others to fork the project if they disagree with the overall +direction the BDFL is taking. The BDFL is chair of the Core Team (see below) +and may delegate his/her authority on a particular decision or set of decisions +to any other Core Team Member at his/her discretion. + +The BDFL can appoint his/her successor, but it is expected that the Core Team +would be consulted on this decision. If the BDFL is unable to appoint a +successor (e.g. due to death or illness), the Core Team will choose a successor +by voting with at least 2/3 of the Core Team members voting in favor of the +chosen successor. At least 80% of the Core Team must participate in the +vote. If no BDFL candidate receives 2/3 of the votes of the Core Team, the Core +Team members shall propose the BDFL candidates to the Main NumFOCUS board, who +will then make the final decision. + +### Core Team + +The Project's Core Team will consist of Project Contributors who have produced +contributions that are substantial in quality and quantity, and sustained over +at least one year. The overall role of the Core Team is to ensure, through +working with the BDFL and taking input from the Community, the long-term +well-being of the project, both technically and as a community. + +During the everyday project activities, Core Team participate in all +discussions, code review and other project activities as peers with all other +Contributors and the Community. In these everyday activities, Core Team do not +have any special power or privilege through their membership on the Core +Team. However, it is expected that because of the quality and quantity of their +contributions and their expert knowledge of the Project Software that the Core +Team will provide useful guidance, both technical and in terms of project +direction, to potentially less experienced contributors. + +The Core Team and its Members play a special role in certain situations. +In particular, the Core Team may: + +- Make decisions about the overall scope, vision and direction of the + project. +- Make decisions about strategic collaborations with other organizations or + individuals. +- Make decisions about specific technical issues, features, bugs and pull + requests. They are the primary mechanism of guiding the code review process + and merging pull requests. +- Make decisions about the Services that are run by The Project and manage + those Services for the benefit of the Project and Community. +- Make decisions when regular community discussion doesn't produce consensus + on an issue in a reasonable time frame. + +### Core Team membership + +To become eligible for being a Core Team Member an individual must be a Project +Contributor who has produced contributions that are substantial in quality and +quantity, and sustained over at least one year. Potential Core Team Members are +nominated by existing Core members and voted upon by the existing Core Team +after asking if the potential Member is interested and willing to serve in that +capacity. The Core Team will be initially formed from the set of existing +Contributors who have been granted commit rights as of late 2015. + +When considering potential Members, the Core Team will look at candidates with +a comprehensive view of their contributions. This will include but is not +limited to code, code review, infrastructure work, mailing list and chat +participation, community help/building, education and outreach, design work, +etc. We are deliberately not setting arbitrary quantitative metrics (like “100 +commits in this repo”) to avoid encouraging behavior that plays to the metrics +rather than the project’s overall well-being. We want to encourage a diverse +array of backgrounds, viewpoints and talents in our team, which is why we +explicitly do not define code as the sole metric on which Core Team membership +will be evaluated. + +If a Core Team member becomes inactive in the project for a period of one year, +they will be considered for removal from the Core Team. Before removal, +inactive Member will be approached by the BDFL to see if they plan on returning +to active participation. If not they will be removed immediately upon a Core +Team vote. If they plan on returning to active participation soon, they will be +given a grace period of one year. If they don't return to active participation +within that time period they will be removed by vote of the Core Team without +further grace period. All former Core Team members can be considered for +membership again at any time in the future, like any other Project Contributor. +Retired Core Team members will be listed on the project website, acknowledging +the period during which they were active in the Core Team. + +The Core Team reserves the right to eject current Members, other than the BDFL, +if they are deemed to be actively harmful to the project’s well-being, and +attempts at communication and conflict resolution have failed. + +### Conflict of interest + +It is expected that the BDFL and Core Team Members will be employed at a wide +range of companies, universities and non-profit organizations. Because of this, +it is possible that Members will have conflict of interests. Such conflict of +interests include, but are not limited to: + +- Financial interests, such as investments, employment or contracting work, + outside of The Project that may influence their work on The Project. +- Access to proprietary information of their employer that could potentially + leak into their work with the Project. + +All members of the Core Team, BDFL included, shall disclose to the rest of the +Core Team any conflict of interest they may have. Members with a conflict of +interest in a particular issue may participate in Core Team discussions on that +issue, but must recuse themselves from voting on the issue. If the BDFL has +recused his/herself for a particular decision, they will appoint a substitute +BDFL for that decision. + +### Private communications of the Core Team + +Unless specifically required, all Core Team discussions and activities will be +public and done in collaboration and discussion with the Project Contributors +and Community. The Core Team will have a private mailing list that will be used +sparingly and only when a specific matter requires privacy. When private +communications and decisions are needed, the Core Team will do its best to +summarize those to the Community after eliding personal/private/sensitive +information that should not be posted to the public internet. + +### Subcommittees + +The Core Team can create subcommittees that provide leadership and guidance for +specific aspects of the project. Like the Core Team as a whole, subcommittees +should conduct their business in an open and public manner unless privacy is +specifically called for. Private subcommittee communications should happen on +the main private mailing list of the Core Team unless specifically called for. + +Question: if the BDFL is not on a subcommittee, do they still have override +authority? + +Suggestion: they do, but they should appoint a delegate who plays that role +most of the time, and explicit BDFL intervention is sought only if the +committee disagrees with that delegate’s decision and no resolution is possible +within the team. This is different from a BDFL delegate for a specific decision +(or a recusal situation), where the BDFL is literally giving up his/her +authority to someone else in full. It’s more like what Linus Torvalds uses with his +“lieutenants” model. + +### NumFOCUS Subcommittee + +The Core Team will maintain one narrowly focused subcommittee to manage its +interactions with NumFOCUS. + +- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage + project funding that comes through NumFOCUS. It is expected that these funds + will be spent in a manner that is consistent with the non-profit mission of + NumFOCUS and the direction of the Project as determined by the full Core + Team. +- This Subcommittee shall NOT make decisions about the direction, scope or + technical direction of the Project. +- This Subcommittee will have at least 5 members. No more than 2 Subcommittee + Members can report to one person (either directly or indirectly) through + employment or contracting work (including the reportee, i.e. the reportee + 1 + is the max). This avoids effective majorities resting on one person. + +## Institutional Partners and Funding + +The BDFL and Core Team are the primary leadership for the project. No outside +institution, individual or legal entity has the ability to own, control, usurp +or influence the project other than by participating in the Project as +Contributors and Core Team. However, because institutions are the primary +funding mechanism for the project, it is important to formally acknowledge +institutional participation in the project. These are Institutional Partners. + +An Institutional Contributor is any individual Project Contributor who +contributes to the project as part of their official duties at an Institutional +Partner. Likewise, an Institutional Core Team Member is any Core Team Member +who contributes to the project as part of their official duties at an +Institutional Partner. + +With these definitions, an Institutional Partner is any recognized legal entity +in the United States or elsewhere that employs at least one Institutional +Contributor or Institutional Core Team Member. Institutional Partners can be +for-profit or non-profit entities. + +Institutions become eligible to become an Institutional Partner by employing +individuals who actively contribute to The Project as part of their official +duties. To state this another way, the only way for an Institutional Partner to +influence the project is by actively contributing to the open development of +the project, on equal terms with any other member of the community of +Contributors and Core Team Members. Merely using pandas Software or Services in +an institutional context does not allow an entity to become an Institutional +Partner. Financial gifts do not enable an entity to become an Institutional +Partner. Once an institution becomes eligible for Institutional Partnership, +the Core Team must nominate and approve the Partnership. + +If an existing Institutional Partner no longer has a contributing employee, +they will be given a one-year grace period for other employees to begin +contributing. + +An Institutional Partner is free to pursue funding for their work on The +Project through any legal means. This could involve a non-profit organization +raising money from private foundations and donors or a for-profit company +building proprietary products and services that leverage Project Software and +Services. Funding acquired by Institutional Partners to work on The Project is +called Institutional Funding. However, no funding obtained by an Institutional +Partner can override The Project BDFL and Core Team. If a Partner has funding +to do pandas work and the Core Team decides to not pursue that work as a +project, the Partner is free to pursue it on their own. However in this +situation, that part of the Partner’s work will not be under the pandas +umbrella and cannot use the Project trademarks in a way that suggests a formal +relationship. + +To acknowledge institutional contributions, there are two levels of +Institutional Partners, with associated benefits: + +**Tier 1** = an institution with at least one Institutional Core Team Member + +- Acknowledged on the pandas website, in talks and T-shirts. +- Ability to acknowledge their own funding sources on the pandas website, in + talks and T-shirts. +- Ability to influence the project through the participation of their Core Team + Member. + +**Tier 2** = an institution with at least one Institutional Contributor + +## Breach + +Non-compliance with the terms of the governance documents shall be reported to +the Core Team either through public or private channels as deemed appropriate. + +## Changing the Governance + +Changes to the governance are submitted via a GitHub pull request to The Project's +[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +The pull request is then refined in response to public comment and review, with +the goal being consensus in the community. After this open period, a Core Team +Member proposes to the Core Team that the changes be ratified and the pull +request merged (accepting the proposed changes) or proposes that the pull +request be closed without merging (rejecting the proposed changes). The Member +should state the final commit hash in the pull request being proposed for +acceptance or rejection and briefly summarize the pull request. A minimum of +80% of the Core Team must vote and at least 2/3 of the votes must be positive +to carry out the proposed action (fractions of a vote rounded up to the nearest +integer). Since the BDFL holds ultimate authority in The Project, the BDFL has +authority to act alone in accepting or rejecting changes or overriding Core +Team decisions. From 769e64998551a55bad19ee0c331e9c58126735aa Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:11 -0500 Subject: [PATCH 037/184] New translations governance.md (Catalan) --- web/pandas/ca/about/governance.md | 317 ++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 web/pandas/ca/about/governance.md diff --git a/web/pandas/ca/about/governance.md b/web/pandas/ca/about/governance.md new file mode 100644 index 000000000..b37925e89 --- /dev/null +++ b/web/pandas/ca/about/governance.md @@ -0,0 +1,317 @@ +# Project governance + +The official version of this document, along with a list of +individuals and institutions in the roles defined in the governance +section below, is contained in the +[Project governance]({{ base_url }}about/governance.html) +page of the pandas website. + +## The Project + +The pandas Project (The Project) is an open source software project affiliated +with the 501(c)3 NumFOCUS Foundation. The goal of The Project is to develop open +source software for data ingest, data preparation, data analysis, and data +visualization for the Python programming language. The Software developed by +The Project is released under the BSD (or similar) open source license, +developed openly and hosted in public GitHub repositories under the pandas +GitHub organization. Examples of Project Software +include the main pandas code repository and the pandas-stubs library. + +Through its affiliation with NumFOCUS, The Project has the right to receive +tax-deductible donations in the United States of America. + +The Project is developed by a team of distributed developers, called +Contributors. Contributors are individuals who have contributed code, +documentation, designs or other work to one or more Project repositories. +Anyone can be a Contributor. Contributors can be affiliated with any legal +entity or none. Contributors participate in the project by submitting, +reviewing and discussing GitHub Pull Requests and Issues and participating in +open and public Project discussions on GitHub, mailing lists, and +elsewhere. The foundation of Project participation is openness and +transparency. + +Here is a list of the current Contributors to the main pandas repository: + +[https://github.com/pandas-dev/pandas/graphs/contributors](https://github.com/pandas-dev/pandas/graphs/contributors) + +There are also many other Contributors listed in the logs of other repositories of +the pandas project. + +The Project Community consists of all Contributors and Users of the Project. +Contributors work on behalf of and are responsible to the larger Project +Community and we strive to keep the barrier between Contributors and Users as +low as possible. + +The Project is formally affiliated with the 501(c)3 NumFOCUS Foundation +([https://numfocus.org](https://numfocus.org)), which serves as its fiscal +sponsor, may hold project trademarks and other intellectual property, helps +manage project donations and acts as a parent legal entity. NumFOCUS is the +only legal entity that has a formal relationship with the project (see +Institutional Partners section below). + +## Governance + +This section describes the governance and leadership model of The Project. + +The foundations of Project governance are: + +- Openness & Transparency +- Active Contribution +- Institutional Neutrality + +Traditionally, Project leadership was provided by a BDFL (Wes McKinney) and +subset of Contributors, called the Core Team, whose active and consistent +contributions have been recognized by their receiving “commit rights” to the +Project GitHub repositories. In general all Project decisions are made through +consensus among the Core Team with input from the Community. The BDFL can, but +rarely chooses to, override the Core Team and make a final decision on a +matter. + +While this approach has served us well, as the Project grows and faces more +legal and financial decisions and interacts with other institutions, we see a +need for a more formal governance model. Moving forward The Project leadership +will consist of a BDFL and Core Team. We view this governance model as the +formalization of what we are already doing, rather than a change in direction. + +### BDFL + +The Project will have a BDFL (Benevolent Dictator for Life), who is currently +Wes McKinney. As Dictator, the BDFL has the authority to make all final +decisions for The Project. As Benevolent, the BDFL, in practice chooses to +defer that authority to the consensus of the community discussion channels and +the Core Team. It is expected, and in the past has been the case, that the BDFL +will only rarely assert his/her final authority. Because it is rarely used, we +refer to BDFL’s final authority as a “special” or “overriding” vote. When it +does occur, the BDFL override typically happens in situations where there is a +deadlock in the Core Team or if the Core Team ask the BDFL to make a decision +on a specific matter. To ensure the benevolence of the BDFL, The Project +encourages others to fork the project if they disagree with the overall +direction the BDFL is taking. The BDFL is chair of the Core Team (see below) +and may delegate his/her authority on a particular decision or set of decisions +to any other Core Team Member at his/her discretion. + +The BDFL can appoint his/her successor, but it is expected that the Core Team +would be consulted on this decision. If the BDFL is unable to appoint a +successor (e.g. due to death or illness), the Core Team will choose a successor +by voting with at least 2/3 of the Core Team members voting in favor of the +chosen successor. At least 80% of the Core Team must participate in the +vote. If no BDFL candidate receives 2/3 of the votes of the Core Team, the Core +Team members shall propose the BDFL candidates to the Main NumFOCUS board, who +will then make the final decision. + +### Core Team + +The Project's Core Team will consist of Project Contributors who have produced +contributions that are substantial in quality and quantity, and sustained over +at least one year. The overall role of the Core Team is to ensure, through +working with the BDFL and taking input from the Community, the long-term +well-being of the project, both technically and as a community. + +During the everyday project activities, Core Team participate in all +discussions, code review and other project activities as peers with all other +Contributors and the Community. In these everyday activities, Core Team do not +have any special power or privilege through their membership on the Core +Team. However, it is expected that because of the quality and quantity of their +contributions and their expert knowledge of the Project Software that the Core +Team will provide useful guidance, both technical and in terms of project +direction, to potentially less experienced contributors. + +The Core Team and its Members play a special role in certain situations. +In particular, the Core Team may: + +- Make decisions about the overall scope, vision and direction of the + project. +- Make decisions about strategic collaborations with other organizations or + individuals. +- Make decisions about specific technical issues, features, bugs and pull + requests. They are the primary mechanism of guiding the code review process + and merging pull requests. +- Make decisions about the Services that are run by The Project and manage + those Services for the benefit of the Project and Community. +- Make decisions when regular community discussion doesn't produce consensus + on an issue in a reasonable time frame. + +### Core Team membership + +To become eligible for being a Core Team Member an individual must be a Project +Contributor who has produced contributions that are substantial in quality and +quantity, and sustained over at least one year. Potential Core Team Members are +nominated by existing Core members and voted upon by the existing Core Team +after asking if the potential Member is interested and willing to serve in that +capacity. The Core Team will be initially formed from the set of existing +Contributors who have been granted commit rights as of late 2015. + +When considering potential Members, the Core Team will look at candidates with +a comprehensive view of their contributions. This will include but is not +limited to code, code review, infrastructure work, mailing list and chat +participation, community help/building, education and outreach, design work, +etc. We are deliberately not setting arbitrary quantitative metrics (like “100 +commits in this repo”) to avoid encouraging behavior that plays to the metrics +rather than the project’s overall well-being. We want to encourage a diverse +array of backgrounds, viewpoints and talents in our team, which is why we +explicitly do not define code as the sole metric on which Core Team membership +will be evaluated. + +If a Core Team member becomes inactive in the project for a period of one year, +they will be considered for removal from the Core Team. Before removal, +inactive Member will be approached by the BDFL to see if they plan on returning +to active participation. If not they will be removed immediately upon a Core +Team vote. If they plan on returning to active participation soon, they will be +given a grace period of one year. If they don't return to active participation +within that time period they will be removed by vote of the Core Team without +further grace period. All former Core Team members can be considered for +membership again at any time in the future, like any other Project Contributor. +Retired Core Team members will be listed on the project website, acknowledging +the period during which they were active in the Core Team. + +The Core Team reserves the right to eject current Members, other than the BDFL, +if they are deemed to be actively harmful to the project’s well-being, and +attempts at communication and conflict resolution have failed. + +### Conflict of interest + +It is expected that the BDFL and Core Team Members will be employed at a wide +range of companies, universities and non-profit organizations. Because of this, +it is possible that Members will have conflict of interests. Such conflict of +interests include, but are not limited to: + +- Financial interests, such as investments, employment or contracting work, + outside of The Project that may influence their work on The Project. +- Access to proprietary information of their employer that could potentially + leak into their work with the Project. + +All members of the Core Team, BDFL included, shall disclose to the rest of the +Core Team any conflict of interest they may have. Members with a conflict of +interest in a particular issue may participate in Core Team discussions on that +issue, but must recuse themselves from voting on the issue. If the BDFL has +recused his/herself for a particular decision, they will appoint a substitute +BDFL for that decision. + +### Private communications of the Core Team + +Unless specifically required, all Core Team discussions and activities will be +public and done in collaboration and discussion with the Project Contributors +and Community. The Core Team will have a private mailing list that will be used +sparingly and only when a specific matter requires privacy. When private +communications and decisions are needed, the Core Team will do its best to +summarize those to the Community after eliding personal/private/sensitive +information that should not be posted to the public internet. + +### Subcommittees + +The Core Team can create subcommittees that provide leadership and guidance for +specific aspects of the project. Like the Core Team as a whole, subcommittees +should conduct their business in an open and public manner unless privacy is +specifically called for. Private subcommittee communications should happen on +the main private mailing list of the Core Team unless specifically called for. + +Question: if the BDFL is not on a subcommittee, do they still have override +authority? + +Suggestion: they do, but they should appoint a delegate who plays that role +most of the time, and explicit BDFL intervention is sought only if the +committee disagrees with that delegate’s decision and no resolution is possible +within the team. This is different from a BDFL delegate for a specific decision +(or a recusal situation), where the BDFL is literally giving up his/her +authority to someone else in full. It’s more like what Linus Torvalds uses with his +“lieutenants” model. + +### NumFOCUS Subcommittee + +The Core Team will maintain one narrowly focused subcommittee to manage its +interactions with NumFOCUS. + +- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage + project funding that comes through NumFOCUS. It is expected that these funds + will be spent in a manner that is consistent with the non-profit mission of + NumFOCUS and the direction of the Project as determined by the full Core + Team. +- This Subcommittee shall NOT make decisions about the direction, scope or + technical direction of the Project. +- This Subcommittee will have at least 5 members. No more than 2 Subcommittee + Members can report to one person (either directly or indirectly) through + employment or contracting work (including the reportee, i.e. the reportee + 1 + is the max). This avoids effective majorities resting on one person. + +## Institutional Partners and Funding + +The BDFL and Core Team are the primary leadership for the project. No outside +institution, individual or legal entity has the ability to own, control, usurp +or influence the project other than by participating in the Project as +Contributors and Core Team. However, because institutions are the primary +funding mechanism for the project, it is important to formally acknowledge +institutional participation in the project. These are Institutional Partners. + +An Institutional Contributor is any individual Project Contributor who +contributes to the project as part of their official duties at an Institutional +Partner. Likewise, an Institutional Core Team Member is any Core Team Member +who contributes to the project as part of their official duties at an +Institutional Partner. + +With these definitions, an Institutional Partner is any recognized legal entity +in the United States or elsewhere that employs at least one Institutional +Contributor or Institutional Core Team Member. Institutional Partners can be +for-profit or non-profit entities. + +Institutions become eligible to become an Institutional Partner by employing +individuals who actively contribute to The Project as part of their official +duties. To state this another way, the only way for an Institutional Partner to +influence the project is by actively contributing to the open development of +the project, on equal terms with any other member of the community of +Contributors and Core Team Members. Merely using pandas Software or Services in +an institutional context does not allow an entity to become an Institutional +Partner. Financial gifts do not enable an entity to become an Institutional +Partner. Once an institution becomes eligible for Institutional Partnership, +the Core Team must nominate and approve the Partnership. + +If an existing Institutional Partner no longer has a contributing employee, +they will be given a one-year grace period for other employees to begin +contributing. + +An Institutional Partner is free to pursue funding for their work on The +Project through any legal means. This could involve a non-profit organization +raising money from private foundations and donors or a for-profit company +building proprietary products and services that leverage Project Software and +Services. Funding acquired by Institutional Partners to work on The Project is +called Institutional Funding. However, no funding obtained by an Institutional +Partner can override The Project BDFL and Core Team. If a Partner has funding +to do pandas work and the Core Team decides to not pursue that work as a +project, the Partner is free to pursue it on their own. However in this +situation, that part of the Partner’s work will not be under the pandas +umbrella and cannot use the Project trademarks in a way that suggests a formal +relationship. + +To acknowledge institutional contributions, there are two levels of +Institutional Partners, with associated benefits: + +**Tier 1** = an institution with at least one Institutional Core Team Member + +- Acknowledged on the pandas website, in talks and T-shirts. +- Ability to acknowledge their own funding sources on the pandas website, in + talks and T-shirts. +- Ability to influence the project through the participation of their Core Team + Member. + +**Tier 2** = an institution with at least one Institutional Contributor + +## Breach + +Non-compliance with the terms of the governance documents shall be reported to +the Core Team either through public or private channels as deemed appropriate. + +## Changing the Governance + +Changes to the governance are submitted via a GitHub pull request to The Project's +[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +The pull request is then refined in response to public comment and review, with +the goal being consensus in the community. After this open period, a Core Team +Member proposes to the Core Team that the changes be ratified and the pull +request merged (accepting the proposed changes) or proposes that the pull +request be closed without merging (rejecting the proposed changes). The Member +should state the final commit hash in the pull request being proposed for +acceptance or rejection and briefly summarize the pull request. A minimum of +80% of the Core Team must vote and at least 2/3 of the votes must be positive +to carry out the proposed action (fractions of a vote rounded up to the nearest +integer). Since the BDFL holds ultimate authority in The Project, the BDFL has +authority to act alone in accepting or rejecting changes or overriding Core +Team decisions. From d9ea01148d04c9d90a811deeec468a2c8a94923a Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:13 -0500 Subject: [PATCH 038/184] New translations governance.md (Japanese) --- web/pandas/ja/about/governance.md | 317 ++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 web/pandas/ja/about/governance.md diff --git a/web/pandas/ja/about/governance.md b/web/pandas/ja/about/governance.md new file mode 100644 index 000000000..b37925e89 --- /dev/null +++ b/web/pandas/ja/about/governance.md @@ -0,0 +1,317 @@ +# Project governance + +The official version of this document, along with a list of +individuals and institutions in the roles defined in the governance +section below, is contained in the +[Project governance]({{ base_url }}about/governance.html) +page of the pandas website. + +## The Project + +The pandas Project (The Project) is an open source software project affiliated +with the 501(c)3 NumFOCUS Foundation. The goal of The Project is to develop open +source software for data ingest, data preparation, data analysis, and data +visualization for the Python programming language. The Software developed by +The Project is released under the BSD (or similar) open source license, +developed openly and hosted in public GitHub repositories under the pandas +GitHub organization. Examples of Project Software +include the main pandas code repository and the pandas-stubs library. + +Through its affiliation with NumFOCUS, The Project has the right to receive +tax-deductible donations in the United States of America. + +The Project is developed by a team of distributed developers, called +Contributors. Contributors are individuals who have contributed code, +documentation, designs or other work to one or more Project repositories. +Anyone can be a Contributor. Contributors can be affiliated with any legal +entity or none. Contributors participate in the project by submitting, +reviewing and discussing GitHub Pull Requests and Issues and participating in +open and public Project discussions on GitHub, mailing lists, and +elsewhere. The foundation of Project participation is openness and +transparency. + +Here is a list of the current Contributors to the main pandas repository: + +[https://github.com/pandas-dev/pandas/graphs/contributors](https://github.com/pandas-dev/pandas/graphs/contributors) + +There are also many other Contributors listed in the logs of other repositories of +the pandas project. + +The Project Community consists of all Contributors and Users of the Project. +Contributors work on behalf of and are responsible to the larger Project +Community and we strive to keep the barrier between Contributors and Users as +low as possible. + +The Project is formally affiliated with the 501(c)3 NumFOCUS Foundation +([https://numfocus.org](https://numfocus.org)), which serves as its fiscal +sponsor, may hold project trademarks and other intellectual property, helps +manage project donations and acts as a parent legal entity. NumFOCUS is the +only legal entity that has a formal relationship with the project (see +Institutional Partners section below). + +## Governance + +This section describes the governance and leadership model of The Project. + +The foundations of Project governance are: + +- Openness & Transparency +- Active Contribution +- Institutional Neutrality + +Traditionally, Project leadership was provided by a BDFL (Wes McKinney) and +subset of Contributors, called the Core Team, whose active and consistent +contributions have been recognized by their receiving “commit rights” to the +Project GitHub repositories. In general all Project decisions are made through +consensus among the Core Team with input from the Community. The BDFL can, but +rarely chooses to, override the Core Team and make a final decision on a +matter. + +While this approach has served us well, as the Project grows and faces more +legal and financial decisions and interacts with other institutions, we see a +need for a more formal governance model. Moving forward The Project leadership +will consist of a BDFL and Core Team. We view this governance model as the +formalization of what we are already doing, rather than a change in direction. + +### BDFL + +The Project will have a BDFL (Benevolent Dictator for Life), who is currently +Wes McKinney. As Dictator, the BDFL has the authority to make all final +decisions for The Project. As Benevolent, the BDFL, in practice chooses to +defer that authority to the consensus of the community discussion channels and +the Core Team. It is expected, and in the past has been the case, that the BDFL +will only rarely assert his/her final authority. Because it is rarely used, we +refer to BDFL’s final authority as a “special” or “overriding” vote. When it +does occur, the BDFL override typically happens in situations where there is a +deadlock in the Core Team or if the Core Team ask the BDFL to make a decision +on a specific matter. To ensure the benevolence of the BDFL, The Project +encourages others to fork the project if they disagree with the overall +direction the BDFL is taking. The BDFL is chair of the Core Team (see below) +and may delegate his/her authority on a particular decision or set of decisions +to any other Core Team Member at his/her discretion. + +The BDFL can appoint his/her successor, but it is expected that the Core Team +would be consulted on this decision. If the BDFL is unable to appoint a +successor (e.g. due to death or illness), the Core Team will choose a successor +by voting with at least 2/3 of the Core Team members voting in favor of the +chosen successor. At least 80% of the Core Team must participate in the +vote. If no BDFL candidate receives 2/3 of the votes of the Core Team, the Core +Team members shall propose the BDFL candidates to the Main NumFOCUS board, who +will then make the final decision. + +### Core Team + +The Project's Core Team will consist of Project Contributors who have produced +contributions that are substantial in quality and quantity, and sustained over +at least one year. The overall role of the Core Team is to ensure, through +working with the BDFL and taking input from the Community, the long-term +well-being of the project, both technically and as a community. + +During the everyday project activities, Core Team participate in all +discussions, code review and other project activities as peers with all other +Contributors and the Community. In these everyday activities, Core Team do not +have any special power or privilege through their membership on the Core +Team. However, it is expected that because of the quality and quantity of their +contributions and their expert knowledge of the Project Software that the Core +Team will provide useful guidance, both technical and in terms of project +direction, to potentially less experienced contributors. + +The Core Team and its Members play a special role in certain situations. +In particular, the Core Team may: + +- Make decisions about the overall scope, vision and direction of the + project. +- Make decisions about strategic collaborations with other organizations or + individuals. +- Make decisions about specific technical issues, features, bugs and pull + requests. They are the primary mechanism of guiding the code review process + and merging pull requests. +- Make decisions about the Services that are run by The Project and manage + those Services for the benefit of the Project and Community. +- Make decisions when regular community discussion doesn't produce consensus + on an issue in a reasonable time frame. + +### Core Team membership + +To become eligible for being a Core Team Member an individual must be a Project +Contributor who has produced contributions that are substantial in quality and +quantity, and sustained over at least one year. Potential Core Team Members are +nominated by existing Core members and voted upon by the existing Core Team +after asking if the potential Member is interested and willing to serve in that +capacity. The Core Team will be initially formed from the set of existing +Contributors who have been granted commit rights as of late 2015. + +When considering potential Members, the Core Team will look at candidates with +a comprehensive view of their contributions. This will include but is not +limited to code, code review, infrastructure work, mailing list and chat +participation, community help/building, education and outreach, design work, +etc. We are deliberately not setting arbitrary quantitative metrics (like “100 +commits in this repo”) to avoid encouraging behavior that plays to the metrics +rather than the project’s overall well-being. We want to encourage a diverse +array of backgrounds, viewpoints and talents in our team, which is why we +explicitly do not define code as the sole metric on which Core Team membership +will be evaluated. + +If a Core Team member becomes inactive in the project for a period of one year, +they will be considered for removal from the Core Team. Before removal, +inactive Member will be approached by the BDFL to see if they plan on returning +to active participation. If not they will be removed immediately upon a Core +Team vote. If they plan on returning to active participation soon, they will be +given a grace period of one year. If they don't return to active participation +within that time period they will be removed by vote of the Core Team without +further grace period. All former Core Team members can be considered for +membership again at any time in the future, like any other Project Contributor. +Retired Core Team members will be listed on the project website, acknowledging +the period during which they were active in the Core Team. + +The Core Team reserves the right to eject current Members, other than the BDFL, +if they are deemed to be actively harmful to the project’s well-being, and +attempts at communication and conflict resolution have failed. + +### Conflict of interest + +It is expected that the BDFL and Core Team Members will be employed at a wide +range of companies, universities and non-profit organizations. Because of this, +it is possible that Members will have conflict of interests. Such conflict of +interests include, but are not limited to: + +- Financial interests, such as investments, employment or contracting work, + outside of The Project that may influence their work on The Project. +- Access to proprietary information of their employer that could potentially + leak into their work with the Project. + +All members of the Core Team, BDFL included, shall disclose to the rest of the +Core Team any conflict of interest they may have. Members with a conflict of +interest in a particular issue may participate in Core Team discussions on that +issue, but must recuse themselves from voting on the issue. If the BDFL has +recused his/herself for a particular decision, they will appoint a substitute +BDFL for that decision. + +### Private communications of the Core Team + +Unless specifically required, all Core Team discussions and activities will be +public and done in collaboration and discussion with the Project Contributors +and Community. The Core Team will have a private mailing list that will be used +sparingly and only when a specific matter requires privacy. When private +communications and decisions are needed, the Core Team will do its best to +summarize those to the Community after eliding personal/private/sensitive +information that should not be posted to the public internet. + +### Subcommittees + +The Core Team can create subcommittees that provide leadership and guidance for +specific aspects of the project. Like the Core Team as a whole, subcommittees +should conduct their business in an open and public manner unless privacy is +specifically called for. Private subcommittee communications should happen on +the main private mailing list of the Core Team unless specifically called for. + +Question: if the BDFL is not on a subcommittee, do they still have override +authority? + +Suggestion: they do, but they should appoint a delegate who plays that role +most of the time, and explicit BDFL intervention is sought only if the +committee disagrees with that delegate’s decision and no resolution is possible +within the team. This is different from a BDFL delegate for a specific decision +(or a recusal situation), where the BDFL is literally giving up his/her +authority to someone else in full. It’s more like what Linus Torvalds uses with his +“lieutenants” model. + +### NumFOCUS Subcommittee + +The Core Team will maintain one narrowly focused subcommittee to manage its +interactions with NumFOCUS. + +- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage + project funding that comes through NumFOCUS. It is expected that these funds + will be spent in a manner that is consistent with the non-profit mission of + NumFOCUS and the direction of the Project as determined by the full Core + Team. +- This Subcommittee shall NOT make decisions about the direction, scope or + technical direction of the Project. +- This Subcommittee will have at least 5 members. No more than 2 Subcommittee + Members can report to one person (either directly or indirectly) through + employment or contracting work (including the reportee, i.e. the reportee + 1 + is the max). This avoids effective majorities resting on one person. + +## Institutional Partners and Funding + +The BDFL and Core Team are the primary leadership for the project. No outside +institution, individual or legal entity has the ability to own, control, usurp +or influence the project other than by participating in the Project as +Contributors and Core Team. However, because institutions are the primary +funding mechanism for the project, it is important to formally acknowledge +institutional participation in the project. These are Institutional Partners. + +An Institutional Contributor is any individual Project Contributor who +contributes to the project as part of their official duties at an Institutional +Partner. Likewise, an Institutional Core Team Member is any Core Team Member +who contributes to the project as part of their official duties at an +Institutional Partner. + +With these definitions, an Institutional Partner is any recognized legal entity +in the United States or elsewhere that employs at least one Institutional +Contributor or Institutional Core Team Member. Institutional Partners can be +for-profit or non-profit entities. + +Institutions become eligible to become an Institutional Partner by employing +individuals who actively contribute to The Project as part of their official +duties. To state this another way, the only way for an Institutional Partner to +influence the project is by actively contributing to the open development of +the project, on equal terms with any other member of the community of +Contributors and Core Team Members. Merely using pandas Software or Services in +an institutional context does not allow an entity to become an Institutional +Partner. Financial gifts do not enable an entity to become an Institutional +Partner. Once an institution becomes eligible for Institutional Partnership, +the Core Team must nominate and approve the Partnership. + +If an existing Institutional Partner no longer has a contributing employee, +they will be given a one-year grace period for other employees to begin +contributing. + +An Institutional Partner is free to pursue funding for their work on The +Project through any legal means. This could involve a non-profit organization +raising money from private foundations and donors or a for-profit company +building proprietary products and services that leverage Project Software and +Services. Funding acquired by Institutional Partners to work on The Project is +called Institutional Funding. However, no funding obtained by an Institutional +Partner can override The Project BDFL and Core Team. If a Partner has funding +to do pandas work and the Core Team decides to not pursue that work as a +project, the Partner is free to pursue it on their own. However in this +situation, that part of the Partner’s work will not be under the pandas +umbrella and cannot use the Project trademarks in a way that suggests a formal +relationship. + +To acknowledge institutional contributions, there are two levels of +Institutional Partners, with associated benefits: + +**Tier 1** = an institution with at least one Institutional Core Team Member + +- Acknowledged on the pandas website, in talks and T-shirts. +- Ability to acknowledge their own funding sources on the pandas website, in + talks and T-shirts. +- Ability to influence the project through the participation of their Core Team + Member. + +**Tier 2** = an institution with at least one Institutional Contributor + +## Breach + +Non-compliance with the terms of the governance documents shall be reported to +the Core Team either through public or private channels as deemed appropriate. + +## Changing the Governance + +Changes to the governance are submitted via a GitHub pull request to The Project's +[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +The pull request is then refined in response to public comment and review, with +the goal being consensus in the community. After this open period, a Core Team +Member proposes to the Core Team that the changes be ratified and the pull +request merged (accepting the proposed changes) or proposes that the pull +request be closed without merging (rejecting the proposed changes). The Member +should state the final commit hash in the pull request being proposed for +acceptance or rejection and briefly summarize the pull request. A minimum of +80% of the Core Team must vote and at least 2/3 of the votes must be positive +to carry out the proposed action (fractions of a vote rounded up to the nearest +integer). Since the BDFL holds ultimate authority in The Project, the BDFL has +authority to act alone in accepting or rejecting changes or overriding Core +Team decisions. From 3243f87674eecf6deee198e636f063725b37f878 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:14 -0500 Subject: [PATCH 039/184] New translations governance.md (Korean) --- web/pandas/ko/about/governance.md | 317 ++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 web/pandas/ko/about/governance.md diff --git a/web/pandas/ko/about/governance.md b/web/pandas/ko/about/governance.md new file mode 100644 index 000000000..b37925e89 --- /dev/null +++ b/web/pandas/ko/about/governance.md @@ -0,0 +1,317 @@ +# Project governance + +The official version of this document, along with a list of +individuals and institutions in the roles defined in the governance +section below, is contained in the +[Project governance]({{ base_url }}about/governance.html) +page of the pandas website. + +## The Project + +The pandas Project (The Project) is an open source software project affiliated +with the 501(c)3 NumFOCUS Foundation. The goal of The Project is to develop open +source software for data ingest, data preparation, data analysis, and data +visualization for the Python programming language. The Software developed by +The Project is released under the BSD (or similar) open source license, +developed openly and hosted in public GitHub repositories under the pandas +GitHub organization. Examples of Project Software +include the main pandas code repository and the pandas-stubs library. + +Through its affiliation with NumFOCUS, The Project has the right to receive +tax-deductible donations in the United States of America. + +The Project is developed by a team of distributed developers, called +Contributors. Contributors are individuals who have contributed code, +documentation, designs or other work to one or more Project repositories. +Anyone can be a Contributor. Contributors can be affiliated with any legal +entity or none. Contributors participate in the project by submitting, +reviewing and discussing GitHub Pull Requests and Issues and participating in +open and public Project discussions on GitHub, mailing lists, and +elsewhere. The foundation of Project participation is openness and +transparency. + +Here is a list of the current Contributors to the main pandas repository: + +[https://github.com/pandas-dev/pandas/graphs/contributors](https://github.com/pandas-dev/pandas/graphs/contributors) + +There are also many other Contributors listed in the logs of other repositories of +the pandas project. + +The Project Community consists of all Contributors and Users of the Project. +Contributors work on behalf of and are responsible to the larger Project +Community and we strive to keep the barrier between Contributors and Users as +low as possible. + +The Project is formally affiliated with the 501(c)3 NumFOCUS Foundation +([https://numfocus.org](https://numfocus.org)), which serves as its fiscal +sponsor, may hold project trademarks and other intellectual property, helps +manage project donations and acts as a parent legal entity. NumFOCUS is the +only legal entity that has a formal relationship with the project (see +Institutional Partners section below). + +## Governance + +This section describes the governance and leadership model of The Project. + +The foundations of Project governance are: + +- Openness & Transparency +- Active Contribution +- Institutional Neutrality + +Traditionally, Project leadership was provided by a BDFL (Wes McKinney) and +subset of Contributors, called the Core Team, whose active and consistent +contributions have been recognized by their receiving “commit rights” to the +Project GitHub repositories. In general all Project decisions are made through +consensus among the Core Team with input from the Community. The BDFL can, but +rarely chooses to, override the Core Team and make a final decision on a +matter. + +While this approach has served us well, as the Project grows and faces more +legal and financial decisions and interacts with other institutions, we see a +need for a more formal governance model. Moving forward The Project leadership +will consist of a BDFL and Core Team. We view this governance model as the +formalization of what we are already doing, rather than a change in direction. + +### BDFL + +The Project will have a BDFL (Benevolent Dictator for Life), who is currently +Wes McKinney. As Dictator, the BDFL has the authority to make all final +decisions for The Project. As Benevolent, the BDFL, in practice chooses to +defer that authority to the consensus of the community discussion channels and +the Core Team. It is expected, and in the past has been the case, that the BDFL +will only rarely assert his/her final authority. Because it is rarely used, we +refer to BDFL’s final authority as a “special” or “overriding” vote. When it +does occur, the BDFL override typically happens in situations where there is a +deadlock in the Core Team or if the Core Team ask the BDFL to make a decision +on a specific matter. To ensure the benevolence of the BDFL, The Project +encourages others to fork the project if they disagree with the overall +direction the BDFL is taking. The BDFL is chair of the Core Team (see below) +and may delegate his/her authority on a particular decision or set of decisions +to any other Core Team Member at his/her discretion. + +The BDFL can appoint his/her successor, but it is expected that the Core Team +would be consulted on this decision. If the BDFL is unable to appoint a +successor (e.g. due to death or illness), the Core Team will choose a successor +by voting with at least 2/3 of the Core Team members voting in favor of the +chosen successor. At least 80% of the Core Team must participate in the +vote. If no BDFL candidate receives 2/3 of the votes of the Core Team, the Core +Team members shall propose the BDFL candidates to the Main NumFOCUS board, who +will then make the final decision. + +### Core Team + +The Project's Core Team will consist of Project Contributors who have produced +contributions that are substantial in quality and quantity, and sustained over +at least one year. The overall role of the Core Team is to ensure, through +working with the BDFL and taking input from the Community, the long-term +well-being of the project, both technically and as a community. + +During the everyday project activities, Core Team participate in all +discussions, code review and other project activities as peers with all other +Contributors and the Community. In these everyday activities, Core Team do not +have any special power or privilege through their membership on the Core +Team. However, it is expected that because of the quality and quantity of their +contributions and their expert knowledge of the Project Software that the Core +Team will provide useful guidance, both technical and in terms of project +direction, to potentially less experienced contributors. + +The Core Team and its Members play a special role in certain situations. +In particular, the Core Team may: + +- Make decisions about the overall scope, vision and direction of the + project. +- Make decisions about strategic collaborations with other organizations or + individuals. +- Make decisions about specific technical issues, features, bugs and pull + requests. They are the primary mechanism of guiding the code review process + and merging pull requests. +- Make decisions about the Services that are run by The Project and manage + those Services for the benefit of the Project and Community. +- Make decisions when regular community discussion doesn't produce consensus + on an issue in a reasonable time frame. + +### Core Team membership + +To become eligible for being a Core Team Member an individual must be a Project +Contributor who has produced contributions that are substantial in quality and +quantity, and sustained over at least one year. Potential Core Team Members are +nominated by existing Core members and voted upon by the existing Core Team +after asking if the potential Member is interested and willing to serve in that +capacity. The Core Team will be initially formed from the set of existing +Contributors who have been granted commit rights as of late 2015. + +When considering potential Members, the Core Team will look at candidates with +a comprehensive view of their contributions. This will include but is not +limited to code, code review, infrastructure work, mailing list and chat +participation, community help/building, education and outreach, design work, +etc. We are deliberately not setting arbitrary quantitative metrics (like “100 +commits in this repo”) to avoid encouraging behavior that plays to the metrics +rather than the project’s overall well-being. We want to encourage a diverse +array of backgrounds, viewpoints and talents in our team, which is why we +explicitly do not define code as the sole metric on which Core Team membership +will be evaluated. + +If a Core Team member becomes inactive in the project for a period of one year, +they will be considered for removal from the Core Team. Before removal, +inactive Member will be approached by the BDFL to see if they plan on returning +to active participation. If not they will be removed immediately upon a Core +Team vote. If they plan on returning to active participation soon, they will be +given a grace period of one year. If they don't return to active participation +within that time period they will be removed by vote of the Core Team without +further grace period. All former Core Team members can be considered for +membership again at any time in the future, like any other Project Contributor. +Retired Core Team members will be listed on the project website, acknowledging +the period during which they were active in the Core Team. + +The Core Team reserves the right to eject current Members, other than the BDFL, +if they are deemed to be actively harmful to the project’s well-being, and +attempts at communication and conflict resolution have failed. + +### Conflict of interest + +It is expected that the BDFL and Core Team Members will be employed at a wide +range of companies, universities and non-profit organizations. Because of this, +it is possible that Members will have conflict of interests. Such conflict of +interests include, but are not limited to: + +- Financial interests, such as investments, employment or contracting work, + outside of The Project that may influence their work on The Project. +- Access to proprietary information of their employer that could potentially + leak into their work with the Project. + +All members of the Core Team, BDFL included, shall disclose to the rest of the +Core Team any conflict of interest they may have. Members with a conflict of +interest in a particular issue may participate in Core Team discussions on that +issue, but must recuse themselves from voting on the issue. If the BDFL has +recused his/herself for a particular decision, they will appoint a substitute +BDFL for that decision. + +### Private communications of the Core Team + +Unless specifically required, all Core Team discussions and activities will be +public and done in collaboration and discussion with the Project Contributors +and Community. The Core Team will have a private mailing list that will be used +sparingly and only when a specific matter requires privacy. When private +communications and decisions are needed, the Core Team will do its best to +summarize those to the Community after eliding personal/private/sensitive +information that should not be posted to the public internet. + +### Subcommittees + +The Core Team can create subcommittees that provide leadership and guidance for +specific aspects of the project. Like the Core Team as a whole, subcommittees +should conduct their business in an open and public manner unless privacy is +specifically called for. Private subcommittee communications should happen on +the main private mailing list of the Core Team unless specifically called for. + +Question: if the BDFL is not on a subcommittee, do they still have override +authority? + +Suggestion: they do, but they should appoint a delegate who plays that role +most of the time, and explicit BDFL intervention is sought only if the +committee disagrees with that delegate’s decision and no resolution is possible +within the team. This is different from a BDFL delegate for a specific decision +(or a recusal situation), where the BDFL is literally giving up his/her +authority to someone else in full. It’s more like what Linus Torvalds uses with his +“lieutenants” model. + +### NumFOCUS Subcommittee + +The Core Team will maintain one narrowly focused subcommittee to manage its +interactions with NumFOCUS. + +- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage + project funding that comes through NumFOCUS. It is expected that these funds + will be spent in a manner that is consistent with the non-profit mission of + NumFOCUS and the direction of the Project as determined by the full Core + Team. +- This Subcommittee shall NOT make decisions about the direction, scope or + technical direction of the Project. +- This Subcommittee will have at least 5 members. No more than 2 Subcommittee + Members can report to one person (either directly or indirectly) through + employment or contracting work (including the reportee, i.e. the reportee + 1 + is the max). This avoids effective majorities resting on one person. + +## Institutional Partners and Funding + +The BDFL and Core Team are the primary leadership for the project. No outside +institution, individual or legal entity has the ability to own, control, usurp +or influence the project other than by participating in the Project as +Contributors and Core Team. However, because institutions are the primary +funding mechanism for the project, it is important to formally acknowledge +institutional participation in the project. These are Institutional Partners. + +An Institutional Contributor is any individual Project Contributor who +contributes to the project as part of their official duties at an Institutional +Partner. Likewise, an Institutional Core Team Member is any Core Team Member +who contributes to the project as part of their official duties at an +Institutional Partner. + +With these definitions, an Institutional Partner is any recognized legal entity +in the United States or elsewhere that employs at least one Institutional +Contributor or Institutional Core Team Member. Institutional Partners can be +for-profit or non-profit entities. + +Institutions become eligible to become an Institutional Partner by employing +individuals who actively contribute to The Project as part of their official +duties. To state this another way, the only way for an Institutional Partner to +influence the project is by actively contributing to the open development of +the project, on equal terms with any other member of the community of +Contributors and Core Team Members. Merely using pandas Software or Services in +an institutional context does not allow an entity to become an Institutional +Partner. Financial gifts do not enable an entity to become an Institutional +Partner. Once an institution becomes eligible for Institutional Partnership, +the Core Team must nominate and approve the Partnership. + +If an existing Institutional Partner no longer has a contributing employee, +they will be given a one-year grace period for other employees to begin +contributing. + +An Institutional Partner is free to pursue funding for their work on The +Project through any legal means. This could involve a non-profit organization +raising money from private foundations and donors or a for-profit company +building proprietary products and services that leverage Project Software and +Services. Funding acquired by Institutional Partners to work on The Project is +called Institutional Funding. However, no funding obtained by an Institutional +Partner can override The Project BDFL and Core Team. If a Partner has funding +to do pandas work and the Core Team decides to not pursue that work as a +project, the Partner is free to pursue it on their own. However in this +situation, that part of the Partner’s work will not be under the pandas +umbrella and cannot use the Project trademarks in a way that suggests a formal +relationship. + +To acknowledge institutional contributions, there are two levels of +Institutional Partners, with associated benefits: + +**Tier 1** = an institution with at least one Institutional Core Team Member + +- Acknowledged on the pandas website, in talks and T-shirts. +- Ability to acknowledge their own funding sources on the pandas website, in + talks and T-shirts. +- Ability to influence the project through the participation of their Core Team + Member. + +**Tier 2** = an institution with at least one Institutional Contributor + +## Breach + +Non-compliance with the terms of the governance documents shall be reported to +the Core Team either through public or private channels as deemed appropriate. + +## Changing the Governance + +Changes to the governance are submitted via a GitHub pull request to The Project's +[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +The pull request is then refined in response to public comment and review, with +the goal being consensus in the community. After this open period, a Core Team +Member proposes to the Core Team that the changes be ratified and the pull +request merged (accepting the proposed changes) or proposes that the pull +request be closed without merging (rejecting the proposed changes). The Member +should state the final commit hash in the pull request being proposed for +acceptance or rejection and briefly summarize the pull request. A minimum of +80% of the Core Team must vote and at least 2/3 of the votes must be positive +to carry out the proposed action (fractions of a vote rounded up to the nearest +integer). Since the BDFL holds ultimate authority in The Project, the BDFL has +authority to act alone in accepting or rejecting changes or overriding Core +Team decisions. From 2cd87223abe3739438356142127a4796c01267fb Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:15 -0500 Subject: [PATCH 040/184] New translations governance.md (Polish) --- web/pandas/pl/about/governance.md | 317 ++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 web/pandas/pl/about/governance.md diff --git a/web/pandas/pl/about/governance.md b/web/pandas/pl/about/governance.md new file mode 100644 index 000000000..b37925e89 --- /dev/null +++ b/web/pandas/pl/about/governance.md @@ -0,0 +1,317 @@ +# Project governance + +The official version of this document, along with a list of +individuals and institutions in the roles defined in the governance +section below, is contained in the +[Project governance]({{ base_url }}about/governance.html) +page of the pandas website. + +## The Project + +The pandas Project (The Project) is an open source software project affiliated +with the 501(c)3 NumFOCUS Foundation. The goal of The Project is to develop open +source software for data ingest, data preparation, data analysis, and data +visualization for the Python programming language. The Software developed by +The Project is released under the BSD (or similar) open source license, +developed openly and hosted in public GitHub repositories under the pandas +GitHub organization. Examples of Project Software +include the main pandas code repository and the pandas-stubs library. + +Through its affiliation with NumFOCUS, The Project has the right to receive +tax-deductible donations in the United States of America. + +The Project is developed by a team of distributed developers, called +Contributors. Contributors are individuals who have contributed code, +documentation, designs or other work to one or more Project repositories. +Anyone can be a Contributor. Contributors can be affiliated with any legal +entity or none. Contributors participate in the project by submitting, +reviewing and discussing GitHub Pull Requests and Issues and participating in +open and public Project discussions on GitHub, mailing lists, and +elsewhere. The foundation of Project participation is openness and +transparency. + +Here is a list of the current Contributors to the main pandas repository: + +[https://github.com/pandas-dev/pandas/graphs/contributors](https://github.com/pandas-dev/pandas/graphs/contributors) + +There are also many other Contributors listed in the logs of other repositories of +the pandas project. + +The Project Community consists of all Contributors and Users of the Project. +Contributors work on behalf of and are responsible to the larger Project +Community and we strive to keep the barrier between Contributors and Users as +low as possible. + +The Project is formally affiliated with the 501(c)3 NumFOCUS Foundation +([https://numfocus.org](https://numfocus.org)), which serves as its fiscal +sponsor, may hold project trademarks and other intellectual property, helps +manage project donations and acts as a parent legal entity. NumFOCUS is the +only legal entity that has a formal relationship with the project (see +Institutional Partners section below). + +## Governance + +This section describes the governance and leadership model of The Project. + +The foundations of Project governance are: + +- Openness & Transparency +- Active Contribution +- Institutional Neutrality + +Traditionally, Project leadership was provided by a BDFL (Wes McKinney) and +subset of Contributors, called the Core Team, whose active and consistent +contributions have been recognized by their receiving “commit rights” to the +Project GitHub repositories. In general all Project decisions are made through +consensus among the Core Team with input from the Community. The BDFL can, but +rarely chooses to, override the Core Team and make a final decision on a +matter. + +While this approach has served us well, as the Project grows and faces more +legal and financial decisions and interacts with other institutions, we see a +need for a more formal governance model. Moving forward The Project leadership +will consist of a BDFL and Core Team. We view this governance model as the +formalization of what we are already doing, rather than a change in direction. + +### BDFL + +The Project will have a BDFL (Benevolent Dictator for Life), who is currently +Wes McKinney. As Dictator, the BDFL has the authority to make all final +decisions for The Project. As Benevolent, the BDFL, in practice chooses to +defer that authority to the consensus of the community discussion channels and +the Core Team. It is expected, and in the past has been the case, that the BDFL +will only rarely assert his/her final authority. Because it is rarely used, we +refer to BDFL’s final authority as a “special” or “overriding” vote. When it +does occur, the BDFL override typically happens in situations where there is a +deadlock in the Core Team or if the Core Team ask the BDFL to make a decision +on a specific matter. To ensure the benevolence of the BDFL, The Project +encourages others to fork the project if they disagree with the overall +direction the BDFL is taking. The BDFL is chair of the Core Team (see below) +and may delegate his/her authority on a particular decision or set of decisions +to any other Core Team Member at his/her discretion. + +The BDFL can appoint his/her successor, but it is expected that the Core Team +would be consulted on this decision. If the BDFL is unable to appoint a +successor (e.g. due to death or illness), the Core Team will choose a successor +by voting with at least 2/3 of the Core Team members voting in favor of the +chosen successor. At least 80% of the Core Team must participate in the +vote. If no BDFL candidate receives 2/3 of the votes of the Core Team, the Core +Team members shall propose the BDFL candidates to the Main NumFOCUS board, who +will then make the final decision. + +### Core Team + +The Project's Core Team will consist of Project Contributors who have produced +contributions that are substantial in quality and quantity, and sustained over +at least one year. The overall role of the Core Team is to ensure, through +working with the BDFL and taking input from the Community, the long-term +well-being of the project, both technically and as a community. + +During the everyday project activities, Core Team participate in all +discussions, code review and other project activities as peers with all other +Contributors and the Community. In these everyday activities, Core Team do not +have any special power or privilege through their membership on the Core +Team. However, it is expected that because of the quality and quantity of their +contributions and their expert knowledge of the Project Software that the Core +Team will provide useful guidance, both technical and in terms of project +direction, to potentially less experienced contributors. + +The Core Team and its Members play a special role in certain situations. +In particular, the Core Team may: + +- Make decisions about the overall scope, vision and direction of the + project. +- Make decisions about strategic collaborations with other organizations or + individuals. +- Make decisions about specific technical issues, features, bugs and pull + requests. They are the primary mechanism of guiding the code review process + and merging pull requests. +- Make decisions about the Services that are run by The Project and manage + those Services for the benefit of the Project and Community. +- Make decisions when regular community discussion doesn't produce consensus + on an issue in a reasonable time frame. + +### Core Team membership + +To become eligible for being a Core Team Member an individual must be a Project +Contributor who has produced contributions that are substantial in quality and +quantity, and sustained over at least one year. Potential Core Team Members are +nominated by existing Core members and voted upon by the existing Core Team +after asking if the potential Member is interested and willing to serve in that +capacity. The Core Team will be initially formed from the set of existing +Contributors who have been granted commit rights as of late 2015. + +When considering potential Members, the Core Team will look at candidates with +a comprehensive view of their contributions. This will include but is not +limited to code, code review, infrastructure work, mailing list and chat +participation, community help/building, education and outreach, design work, +etc. We are deliberately not setting arbitrary quantitative metrics (like “100 +commits in this repo”) to avoid encouraging behavior that plays to the metrics +rather than the project’s overall well-being. We want to encourage a diverse +array of backgrounds, viewpoints and talents in our team, which is why we +explicitly do not define code as the sole metric on which Core Team membership +will be evaluated. + +If a Core Team member becomes inactive in the project for a period of one year, +they will be considered for removal from the Core Team. Before removal, +inactive Member will be approached by the BDFL to see if they plan on returning +to active participation. If not they will be removed immediately upon a Core +Team vote. If they plan on returning to active participation soon, they will be +given a grace period of one year. If they don't return to active participation +within that time period they will be removed by vote of the Core Team without +further grace period. All former Core Team members can be considered for +membership again at any time in the future, like any other Project Contributor. +Retired Core Team members will be listed on the project website, acknowledging +the period during which they were active in the Core Team. + +The Core Team reserves the right to eject current Members, other than the BDFL, +if they are deemed to be actively harmful to the project’s well-being, and +attempts at communication and conflict resolution have failed. + +### Conflict of interest + +It is expected that the BDFL and Core Team Members will be employed at a wide +range of companies, universities and non-profit organizations. Because of this, +it is possible that Members will have conflict of interests. Such conflict of +interests include, but are not limited to: + +- Financial interests, such as investments, employment or contracting work, + outside of The Project that may influence their work on The Project. +- Access to proprietary information of their employer that could potentially + leak into their work with the Project. + +All members of the Core Team, BDFL included, shall disclose to the rest of the +Core Team any conflict of interest they may have. Members with a conflict of +interest in a particular issue may participate in Core Team discussions on that +issue, but must recuse themselves from voting on the issue. If the BDFL has +recused his/herself for a particular decision, they will appoint a substitute +BDFL for that decision. + +### Private communications of the Core Team + +Unless specifically required, all Core Team discussions and activities will be +public and done in collaboration and discussion with the Project Contributors +and Community. The Core Team will have a private mailing list that will be used +sparingly and only when a specific matter requires privacy. When private +communications and decisions are needed, the Core Team will do its best to +summarize those to the Community after eliding personal/private/sensitive +information that should not be posted to the public internet. + +### Subcommittees + +The Core Team can create subcommittees that provide leadership and guidance for +specific aspects of the project. Like the Core Team as a whole, subcommittees +should conduct their business in an open and public manner unless privacy is +specifically called for. Private subcommittee communications should happen on +the main private mailing list of the Core Team unless specifically called for. + +Question: if the BDFL is not on a subcommittee, do they still have override +authority? + +Suggestion: they do, but they should appoint a delegate who plays that role +most of the time, and explicit BDFL intervention is sought only if the +committee disagrees with that delegate’s decision and no resolution is possible +within the team. This is different from a BDFL delegate for a specific decision +(or a recusal situation), where the BDFL is literally giving up his/her +authority to someone else in full. It’s more like what Linus Torvalds uses with his +“lieutenants” model. + +### NumFOCUS Subcommittee + +The Core Team will maintain one narrowly focused subcommittee to manage its +interactions with NumFOCUS. + +- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage + project funding that comes through NumFOCUS. It is expected that these funds + will be spent in a manner that is consistent with the non-profit mission of + NumFOCUS and the direction of the Project as determined by the full Core + Team. +- This Subcommittee shall NOT make decisions about the direction, scope or + technical direction of the Project. +- This Subcommittee will have at least 5 members. No more than 2 Subcommittee + Members can report to one person (either directly or indirectly) through + employment or contracting work (including the reportee, i.e. the reportee + 1 + is the max). This avoids effective majorities resting on one person. + +## Institutional Partners and Funding + +The BDFL and Core Team are the primary leadership for the project. No outside +institution, individual or legal entity has the ability to own, control, usurp +or influence the project other than by participating in the Project as +Contributors and Core Team. However, because institutions are the primary +funding mechanism for the project, it is important to formally acknowledge +institutional participation in the project. These are Institutional Partners. + +An Institutional Contributor is any individual Project Contributor who +contributes to the project as part of their official duties at an Institutional +Partner. Likewise, an Institutional Core Team Member is any Core Team Member +who contributes to the project as part of their official duties at an +Institutional Partner. + +With these definitions, an Institutional Partner is any recognized legal entity +in the United States or elsewhere that employs at least one Institutional +Contributor or Institutional Core Team Member. Institutional Partners can be +for-profit or non-profit entities. + +Institutions become eligible to become an Institutional Partner by employing +individuals who actively contribute to The Project as part of their official +duties. To state this another way, the only way for an Institutional Partner to +influence the project is by actively contributing to the open development of +the project, on equal terms with any other member of the community of +Contributors and Core Team Members. Merely using pandas Software or Services in +an institutional context does not allow an entity to become an Institutional +Partner. Financial gifts do not enable an entity to become an Institutional +Partner. Once an institution becomes eligible for Institutional Partnership, +the Core Team must nominate and approve the Partnership. + +If an existing Institutional Partner no longer has a contributing employee, +they will be given a one-year grace period for other employees to begin +contributing. + +An Institutional Partner is free to pursue funding for their work on The +Project through any legal means. This could involve a non-profit organization +raising money from private foundations and donors or a for-profit company +building proprietary products and services that leverage Project Software and +Services. Funding acquired by Institutional Partners to work on The Project is +called Institutional Funding. However, no funding obtained by an Institutional +Partner can override The Project BDFL and Core Team. If a Partner has funding +to do pandas work and the Core Team decides to not pursue that work as a +project, the Partner is free to pursue it on their own. However in this +situation, that part of the Partner’s work will not be under the pandas +umbrella and cannot use the Project trademarks in a way that suggests a formal +relationship. + +To acknowledge institutional contributions, there are two levels of +Institutional Partners, with associated benefits: + +**Tier 1** = an institution with at least one Institutional Core Team Member + +- Acknowledged on the pandas website, in talks and T-shirts. +- Ability to acknowledge their own funding sources on the pandas website, in + talks and T-shirts. +- Ability to influence the project through the participation of their Core Team + Member. + +**Tier 2** = an institution with at least one Institutional Contributor + +## Breach + +Non-compliance with the terms of the governance documents shall be reported to +the Core Team either through public or private channels as deemed appropriate. + +## Changing the Governance + +Changes to the governance are submitted via a GitHub pull request to The Project's +[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +The pull request is then refined in response to public comment and review, with +the goal being consensus in the community. After this open period, a Core Team +Member proposes to the Core Team that the changes be ratified and the pull +request merged (accepting the proposed changes) or proposes that the pull +request be closed without merging (rejecting the proposed changes). The Member +should state the final commit hash in the pull request being proposed for +acceptance or rejection and briefly summarize the pull request. A minimum of +80% of the Core Team must vote and at least 2/3 of the votes must be positive +to carry out the proposed action (fractions of a vote rounded up to the nearest +integer). Since the BDFL holds ultimate authority in The Project, the BDFL has +authority to act alone in accepting or rejecting changes or overriding Core +Team decisions. From 3a3ef339a96892cd283254e3a25d2b968e7e74db Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:17 -0500 Subject: [PATCH 041/184] New translations governance.md (Russian) --- web/pandas/ru/about/governance.md | 317 ++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 web/pandas/ru/about/governance.md diff --git a/web/pandas/ru/about/governance.md b/web/pandas/ru/about/governance.md new file mode 100644 index 000000000..b37925e89 --- /dev/null +++ b/web/pandas/ru/about/governance.md @@ -0,0 +1,317 @@ +# Project governance + +The official version of this document, along with a list of +individuals and institutions in the roles defined in the governance +section below, is contained in the +[Project governance]({{ base_url }}about/governance.html) +page of the pandas website. + +## The Project + +The pandas Project (The Project) is an open source software project affiliated +with the 501(c)3 NumFOCUS Foundation. The goal of The Project is to develop open +source software for data ingest, data preparation, data analysis, and data +visualization for the Python programming language. The Software developed by +The Project is released under the BSD (or similar) open source license, +developed openly and hosted in public GitHub repositories under the pandas +GitHub organization. Examples of Project Software +include the main pandas code repository and the pandas-stubs library. + +Through its affiliation with NumFOCUS, The Project has the right to receive +tax-deductible donations in the United States of America. + +The Project is developed by a team of distributed developers, called +Contributors. Contributors are individuals who have contributed code, +documentation, designs or other work to one or more Project repositories. +Anyone can be a Contributor. Contributors can be affiliated with any legal +entity or none. Contributors participate in the project by submitting, +reviewing and discussing GitHub Pull Requests and Issues and participating in +open and public Project discussions on GitHub, mailing lists, and +elsewhere. The foundation of Project participation is openness and +transparency. + +Here is a list of the current Contributors to the main pandas repository: + +[https://github.com/pandas-dev/pandas/graphs/contributors](https://github.com/pandas-dev/pandas/graphs/contributors) + +There are also many other Contributors listed in the logs of other repositories of +the pandas project. + +The Project Community consists of all Contributors and Users of the Project. +Contributors work on behalf of and are responsible to the larger Project +Community and we strive to keep the barrier between Contributors and Users as +low as possible. + +The Project is formally affiliated with the 501(c)3 NumFOCUS Foundation +([https://numfocus.org](https://numfocus.org)), which serves as its fiscal +sponsor, may hold project trademarks and other intellectual property, helps +manage project donations and acts as a parent legal entity. NumFOCUS is the +only legal entity that has a formal relationship with the project (see +Institutional Partners section below). + +## Governance + +This section describes the governance and leadership model of The Project. + +The foundations of Project governance are: + +- Openness & Transparency +- Active Contribution +- Institutional Neutrality + +Traditionally, Project leadership was provided by a BDFL (Wes McKinney) and +subset of Contributors, called the Core Team, whose active and consistent +contributions have been recognized by their receiving “commit rights” to the +Project GitHub repositories. In general all Project decisions are made through +consensus among the Core Team with input from the Community. The BDFL can, but +rarely chooses to, override the Core Team and make a final decision on a +matter. + +While this approach has served us well, as the Project grows and faces more +legal and financial decisions and interacts with other institutions, we see a +need for a more formal governance model. Moving forward The Project leadership +will consist of a BDFL and Core Team. We view this governance model as the +formalization of what we are already doing, rather than a change in direction. + +### BDFL + +The Project will have a BDFL (Benevolent Dictator for Life), who is currently +Wes McKinney. As Dictator, the BDFL has the authority to make all final +decisions for The Project. As Benevolent, the BDFL, in practice chooses to +defer that authority to the consensus of the community discussion channels and +the Core Team. It is expected, and in the past has been the case, that the BDFL +will only rarely assert his/her final authority. Because it is rarely used, we +refer to BDFL’s final authority as a “special” or “overriding” vote. When it +does occur, the BDFL override typically happens in situations where there is a +deadlock in the Core Team or if the Core Team ask the BDFL to make a decision +on a specific matter. To ensure the benevolence of the BDFL, The Project +encourages others to fork the project if they disagree with the overall +direction the BDFL is taking. The BDFL is chair of the Core Team (see below) +and may delegate his/her authority on a particular decision or set of decisions +to any other Core Team Member at his/her discretion. + +The BDFL can appoint his/her successor, but it is expected that the Core Team +would be consulted on this decision. If the BDFL is unable to appoint a +successor (e.g. due to death or illness), the Core Team will choose a successor +by voting with at least 2/3 of the Core Team members voting in favor of the +chosen successor. At least 80% of the Core Team must participate in the +vote. If no BDFL candidate receives 2/3 of the votes of the Core Team, the Core +Team members shall propose the BDFL candidates to the Main NumFOCUS board, who +will then make the final decision. + +### Core Team + +The Project's Core Team will consist of Project Contributors who have produced +contributions that are substantial in quality and quantity, and sustained over +at least one year. The overall role of the Core Team is to ensure, through +working with the BDFL and taking input from the Community, the long-term +well-being of the project, both technically and as a community. + +During the everyday project activities, Core Team participate in all +discussions, code review and other project activities as peers with all other +Contributors and the Community. In these everyday activities, Core Team do not +have any special power or privilege through their membership on the Core +Team. However, it is expected that because of the quality and quantity of their +contributions and their expert knowledge of the Project Software that the Core +Team will provide useful guidance, both technical and in terms of project +direction, to potentially less experienced contributors. + +The Core Team and its Members play a special role in certain situations. +In particular, the Core Team may: + +- Make decisions about the overall scope, vision and direction of the + project. +- Make decisions about strategic collaborations with other organizations or + individuals. +- Make decisions about specific technical issues, features, bugs and pull + requests. They are the primary mechanism of guiding the code review process + and merging pull requests. +- Make decisions about the Services that are run by The Project and manage + those Services for the benefit of the Project and Community. +- Make decisions when regular community discussion doesn't produce consensus + on an issue in a reasonable time frame. + +### Core Team membership + +To become eligible for being a Core Team Member an individual must be a Project +Contributor who has produced contributions that are substantial in quality and +quantity, and sustained over at least one year. Potential Core Team Members are +nominated by existing Core members and voted upon by the existing Core Team +after asking if the potential Member is interested and willing to serve in that +capacity. The Core Team will be initially formed from the set of existing +Contributors who have been granted commit rights as of late 2015. + +When considering potential Members, the Core Team will look at candidates with +a comprehensive view of their contributions. This will include but is not +limited to code, code review, infrastructure work, mailing list and chat +participation, community help/building, education and outreach, design work, +etc. We are deliberately not setting arbitrary quantitative metrics (like “100 +commits in this repo”) to avoid encouraging behavior that plays to the metrics +rather than the project’s overall well-being. We want to encourage a diverse +array of backgrounds, viewpoints and talents in our team, which is why we +explicitly do not define code as the sole metric on which Core Team membership +will be evaluated. + +If a Core Team member becomes inactive in the project for a period of one year, +they will be considered for removal from the Core Team. Before removal, +inactive Member will be approached by the BDFL to see if they plan on returning +to active participation. If not they will be removed immediately upon a Core +Team vote. If they plan on returning to active participation soon, they will be +given a grace period of one year. If they don't return to active participation +within that time period they will be removed by vote of the Core Team without +further grace period. All former Core Team members can be considered for +membership again at any time in the future, like any other Project Contributor. +Retired Core Team members will be listed on the project website, acknowledging +the period during which they were active in the Core Team. + +The Core Team reserves the right to eject current Members, other than the BDFL, +if they are deemed to be actively harmful to the project’s well-being, and +attempts at communication and conflict resolution have failed. + +### Conflict of interest + +It is expected that the BDFL and Core Team Members will be employed at a wide +range of companies, universities and non-profit organizations. Because of this, +it is possible that Members will have conflict of interests. Such conflict of +interests include, but are not limited to: + +- Financial interests, such as investments, employment or contracting work, + outside of The Project that may influence their work on The Project. +- Access to proprietary information of their employer that could potentially + leak into their work with the Project. + +All members of the Core Team, BDFL included, shall disclose to the rest of the +Core Team any conflict of interest they may have. Members with a conflict of +interest in a particular issue may participate in Core Team discussions on that +issue, but must recuse themselves from voting on the issue. If the BDFL has +recused his/herself for a particular decision, they will appoint a substitute +BDFL for that decision. + +### Private communications of the Core Team + +Unless specifically required, all Core Team discussions and activities will be +public and done in collaboration and discussion with the Project Contributors +and Community. The Core Team will have a private mailing list that will be used +sparingly and only when a specific matter requires privacy. When private +communications and decisions are needed, the Core Team will do its best to +summarize those to the Community after eliding personal/private/sensitive +information that should not be posted to the public internet. + +### Subcommittees + +The Core Team can create subcommittees that provide leadership and guidance for +specific aspects of the project. Like the Core Team as a whole, subcommittees +should conduct their business in an open and public manner unless privacy is +specifically called for. Private subcommittee communications should happen on +the main private mailing list of the Core Team unless specifically called for. + +Question: if the BDFL is not on a subcommittee, do they still have override +authority? + +Suggestion: they do, but they should appoint a delegate who plays that role +most of the time, and explicit BDFL intervention is sought only if the +committee disagrees with that delegate’s decision and no resolution is possible +within the team. This is different from a BDFL delegate for a specific decision +(or a recusal situation), where the BDFL is literally giving up his/her +authority to someone else in full. It’s more like what Linus Torvalds uses with his +“lieutenants” model. + +### NumFOCUS Subcommittee + +The Core Team will maintain one narrowly focused subcommittee to manage its +interactions with NumFOCUS. + +- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage + project funding that comes through NumFOCUS. It is expected that these funds + will be spent in a manner that is consistent with the non-profit mission of + NumFOCUS and the direction of the Project as determined by the full Core + Team. +- This Subcommittee shall NOT make decisions about the direction, scope or + technical direction of the Project. +- This Subcommittee will have at least 5 members. No more than 2 Subcommittee + Members can report to one person (either directly or indirectly) through + employment or contracting work (including the reportee, i.e. the reportee + 1 + is the max). This avoids effective majorities resting on one person. + +## Institutional Partners and Funding + +The BDFL and Core Team are the primary leadership for the project. No outside +institution, individual or legal entity has the ability to own, control, usurp +or influence the project other than by participating in the Project as +Contributors and Core Team. However, because institutions are the primary +funding mechanism for the project, it is important to formally acknowledge +institutional participation in the project. These are Institutional Partners. + +An Institutional Contributor is any individual Project Contributor who +contributes to the project as part of their official duties at an Institutional +Partner. Likewise, an Institutional Core Team Member is any Core Team Member +who contributes to the project as part of their official duties at an +Institutional Partner. + +With these definitions, an Institutional Partner is any recognized legal entity +in the United States or elsewhere that employs at least one Institutional +Contributor or Institutional Core Team Member. Institutional Partners can be +for-profit or non-profit entities. + +Institutions become eligible to become an Institutional Partner by employing +individuals who actively contribute to The Project as part of their official +duties. To state this another way, the only way for an Institutional Partner to +influence the project is by actively contributing to the open development of +the project, on equal terms with any other member of the community of +Contributors and Core Team Members. Merely using pandas Software or Services in +an institutional context does not allow an entity to become an Institutional +Partner. Financial gifts do not enable an entity to become an Institutional +Partner. Once an institution becomes eligible for Institutional Partnership, +the Core Team must nominate and approve the Partnership. + +If an existing Institutional Partner no longer has a contributing employee, +they will be given a one-year grace period for other employees to begin +contributing. + +An Institutional Partner is free to pursue funding for their work on The +Project through any legal means. This could involve a non-profit organization +raising money from private foundations and donors or a for-profit company +building proprietary products and services that leverage Project Software and +Services. Funding acquired by Institutional Partners to work on The Project is +called Institutional Funding. However, no funding obtained by an Institutional +Partner can override The Project BDFL and Core Team. If a Partner has funding +to do pandas work and the Core Team decides to not pursue that work as a +project, the Partner is free to pursue it on their own. However in this +situation, that part of the Partner’s work will not be under the pandas +umbrella and cannot use the Project trademarks in a way that suggests a formal +relationship. + +To acknowledge institutional contributions, there are two levels of +Institutional Partners, with associated benefits: + +**Tier 1** = an institution with at least one Institutional Core Team Member + +- Acknowledged on the pandas website, in talks and T-shirts. +- Ability to acknowledge their own funding sources on the pandas website, in + talks and T-shirts. +- Ability to influence the project through the participation of their Core Team + Member. + +**Tier 2** = an institution with at least one Institutional Contributor + +## Breach + +Non-compliance with the terms of the governance documents shall be reported to +the Core Team either through public or private channels as deemed appropriate. + +## Changing the Governance + +Changes to the governance are submitted via a GitHub pull request to The Project's +[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +The pull request is then refined in response to public comment and review, with +the goal being consensus in the community. After this open period, a Core Team +Member proposes to the Core Team that the changes be ratified and the pull +request merged (accepting the proposed changes) or proposes that the pull +request be closed without merging (rejecting the proposed changes). The Member +should state the final commit hash in the pull request being proposed for +acceptance or rejection and briefly summarize the pull request. A minimum of +80% of the Core Team must vote and at least 2/3 of the votes must be positive +to carry out the proposed action (fractions of a vote rounded up to the nearest +integer). Since the BDFL holds ultimate authority in The Project, the BDFL has +authority to act alone in accepting or rejecting changes or overriding Core +Team decisions. From 87cfd32197e9a2cf1c04f2faa426806cc7fcf0fc Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:18 -0500 Subject: [PATCH 042/184] New translations governance.md (Chinese Simplified) --- web/pandas/zh/about/governance.md | 317 ++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 web/pandas/zh/about/governance.md diff --git a/web/pandas/zh/about/governance.md b/web/pandas/zh/about/governance.md new file mode 100644 index 000000000..b37925e89 --- /dev/null +++ b/web/pandas/zh/about/governance.md @@ -0,0 +1,317 @@ +# Project governance + +The official version of this document, along with a list of +individuals and institutions in the roles defined in the governance +section below, is contained in the +[Project governance]({{ base_url }}about/governance.html) +page of the pandas website. + +## The Project + +The pandas Project (The Project) is an open source software project affiliated +with the 501(c)3 NumFOCUS Foundation. The goal of The Project is to develop open +source software for data ingest, data preparation, data analysis, and data +visualization for the Python programming language. The Software developed by +The Project is released under the BSD (or similar) open source license, +developed openly and hosted in public GitHub repositories under the pandas +GitHub organization. Examples of Project Software +include the main pandas code repository and the pandas-stubs library. + +Through its affiliation with NumFOCUS, The Project has the right to receive +tax-deductible donations in the United States of America. + +The Project is developed by a team of distributed developers, called +Contributors. Contributors are individuals who have contributed code, +documentation, designs or other work to one or more Project repositories. +Anyone can be a Contributor. Contributors can be affiliated with any legal +entity or none. Contributors participate in the project by submitting, +reviewing and discussing GitHub Pull Requests and Issues and participating in +open and public Project discussions on GitHub, mailing lists, and +elsewhere. The foundation of Project participation is openness and +transparency. + +Here is a list of the current Contributors to the main pandas repository: + +[https://github.com/pandas-dev/pandas/graphs/contributors](https://github.com/pandas-dev/pandas/graphs/contributors) + +There are also many other Contributors listed in the logs of other repositories of +the pandas project. + +The Project Community consists of all Contributors and Users of the Project. +Contributors work on behalf of and are responsible to the larger Project +Community and we strive to keep the barrier between Contributors and Users as +low as possible. + +The Project is formally affiliated with the 501(c)3 NumFOCUS Foundation +([https://numfocus.org](https://numfocus.org)), which serves as its fiscal +sponsor, may hold project trademarks and other intellectual property, helps +manage project donations and acts as a parent legal entity. NumFOCUS is the +only legal entity that has a formal relationship with the project (see +Institutional Partners section below). + +## Governance + +This section describes the governance and leadership model of The Project. + +The foundations of Project governance are: + +- Openness & Transparency +- Active Contribution +- Institutional Neutrality + +Traditionally, Project leadership was provided by a BDFL (Wes McKinney) and +subset of Contributors, called the Core Team, whose active and consistent +contributions have been recognized by their receiving “commit rights” to the +Project GitHub repositories. In general all Project decisions are made through +consensus among the Core Team with input from the Community. The BDFL can, but +rarely chooses to, override the Core Team and make a final decision on a +matter. + +While this approach has served us well, as the Project grows and faces more +legal and financial decisions and interacts with other institutions, we see a +need for a more formal governance model. Moving forward The Project leadership +will consist of a BDFL and Core Team. We view this governance model as the +formalization of what we are already doing, rather than a change in direction. + +### BDFL + +The Project will have a BDFL (Benevolent Dictator for Life), who is currently +Wes McKinney. As Dictator, the BDFL has the authority to make all final +decisions for The Project. As Benevolent, the BDFL, in practice chooses to +defer that authority to the consensus of the community discussion channels and +the Core Team. It is expected, and in the past has been the case, that the BDFL +will only rarely assert his/her final authority. Because it is rarely used, we +refer to BDFL’s final authority as a “special” or “overriding” vote. When it +does occur, the BDFL override typically happens in situations where there is a +deadlock in the Core Team or if the Core Team ask the BDFL to make a decision +on a specific matter. To ensure the benevolence of the BDFL, The Project +encourages others to fork the project if they disagree with the overall +direction the BDFL is taking. The BDFL is chair of the Core Team (see below) +and may delegate his/her authority on a particular decision or set of decisions +to any other Core Team Member at his/her discretion. + +The BDFL can appoint his/her successor, but it is expected that the Core Team +would be consulted on this decision. If the BDFL is unable to appoint a +successor (e.g. due to death or illness), the Core Team will choose a successor +by voting with at least 2/3 of the Core Team members voting in favor of the +chosen successor. At least 80% of the Core Team must participate in the +vote. If no BDFL candidate receives 2/3 of the votes of the Core Team, the Core +Team members shall propose the BDFL candidates to the Main NumFOCUS board, who +will then make the final decision. + +### Core Team + +The Project's Core Team will consist of Project Contributors who have produced +contributions that are substantial in quality and quantity, and sustained over +at least one year. The overall role of the Core Team is to ensure, through +working with the BDFL and taking input from the Community, the long-term +well-being of the project, both technically and as a community. + +During the everyday project activities, Core Team participate in all +discussions, code review and other project activities as peers with all other +Contributors and the Community. In these everyday activities, Core Team do not +have any special power or privilege through their membership on the Core +Team. However, it is expected that because of the quality and quantity of their +contributions and their expert knowledge of the Project Software that the Core +Team will provide useful guidance, both technical and in terms of project +direction, to potentially less experienced contributors. + +The Core Team and its Members play a special role in certain situations. +In particular, the Core Team may: + +- Make decisions about the overall scope, vision and direction of the + project. +- Make decisions about strategic collaborations with other organizations or + individuals. +- Make decisions about specific technical issues, features, bugs and pull + requests. They are the primary mechanism of guiding the code review process + and merging pull requests. +- Make decisions about the Services that are run by The Project and manage + those Services for the benefit of the Project and Community. +- Make decisions when regular community discussion doesn't produce consensus + on an issue in a reasonable time frame. + +### Core Team membership + +To become eligible for being a Core Team Member an individual must be a Project +Contributor who has produced contributions that are substantial in quality and +quantity, and sustained over at least one year. Potential Core Team Members are +nominated by existing Core members and voted upon by the existing Core Team +after asking if the potential Member is interested and willing to serve in that +capacity. The Core Team will be initially formed from the set of existing +Contributors who have been granted commit rights as of late 2015. + +When considering potential Members, the Core Team will look at candidates with +a comprehensive view of their contributions. This will include but is not +limited to code, code review, infrastructure work, mailing list and chat +participation, community help/building, education and outreach, design work, +etc. We are deliberately not setting arbitrary quantitative metrics (like “100 +commits in this repo”) to avoid encouraging behavior that plays to the metrics +rather than the project’s overall well-being. We want to encourage a diverse +array of backgrounds, viewpoints and talents in our team, which is why we +explicitly do not define code as the sole metric on which Core Team membership +will be evaluated. + +If a Core Team member becomes inactive in the project for a period of one year, +they will be considered for removal from the Core Team. Before removal, +inactive Member will be approached by the BDFL to see if they plan on returning +to active participation. If not they will be removed immediately upon a Core +Team vote. If they plan on returning to active participation soon, they will be +given a grace period of one year. If they don't return to active participation +within that time period they will be removed by vote of the Core Team without +further grace period. All former Core Team members can be considered for +membership again at any time in the future, like any other Project Contributor. +Retired Core Team members will be listed on the project website, acknowledging +the period during which they were active in the Core Team. + +The Core Team reserves the right to eject current Members, other than the BDFL, +if they are deemed to be actively harmful to the project’s well-being, and +attempts at communication and conflict resolution have failed. + +### Conflict of interest + +It is expected that the BDFL and Core Team Members will be employed at a wide +range of companies, universities and non-profit organizations. Because of this, +it is possible that Members will have conflict of interests. Such conflict of +interests include, but are not limited to: + +- Financial interests, such as investments, employment or contracting work, + outside of The Project that may influence their work on The Project. +- Access to proprietary information of their employer that could potentially + leak into their work with the Project. + +All members of the Core Team, BDFL included, shall disclose to the rest of the +Core Team any conflict of interest they may have. Members with a conflict of +interest in a particular issue may participate in Core Team discussions on that +issue, but must recuse themselves from voting on the issue. If the BDFL has +recused his/herself for a particular decision, they will appoint a substitute +BDFL for that decision. + +### Private communications of the Core Team + +Unless specifically required, all Core Team discussions and activities will be +public and done in collaboration and discussion with the Project Contributors +and Community. The Core Team will have a private mailing list that will be used +sparingly and only when a specific matter requires privacy. When private +communications and decisions are needed, the Core Team will do its best to +summarize those to the Community after eliding personal/private/sensitive +information that should not be posted to the public internet. + +### Subcommittees + +The Core Team can create subcommittees that provide leadership and guidance for +specific aspects of the project. Like the Core Team as a whole, subcommittees +should conduct their business in an open and public manner unless privacy is +specifically called for. Private subcommittee communications should happen on +the main private mailing list of the Core Team unless specifically called for. + +Question: if the BDFL is not on a subcommittee, do they still have override +authority? + +Suggestion: they do, but they should appoint a delegate who plays that role +most of the time, and explicit BDFL intervention is sought only if the +committee disagrees with that delegate’s decision and no resolution is possible +within the team. This is different from a BDFL delegate for a specific decision +(or a recusal situation), where the BDFL is literally giving up his/her +authority to someone else in full. It’s more like what Linus Torvalds uses with his +“lieutenants” model. + +### NumFOCUS Subcommittee + +The Core Team will maintain one narrowly focused subcommittee to manage its +interactions with NumFOCUS. + +- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage + project funding that comes through NumFOCUS. It is expected that these funds + will be spent in a manner that is consistent with the non-profit mission of + NumFOCUS and the direction of the Project as determined by the full Core + Team. +- This Subcommittee shall NOT make decisions about the direction, scope or + technical direction of the Project. +- This Subcommittee will have at least 5 members. No more than 2 Subcommittee + Members can report to one person (either directly or indirectly) through + employment or contracting work (including the reportee, i.e. the reportee + 1 + is the max). This avoids effective majorities resting on one person. + +## Institutional Partners and Funding + +The BDFL and Core Team are the primary leadership for the project. No outside +institution, individual or legal entity has the ability to own, control, usurp +or influence the project other than by participating in the Project as +Contributors and Core Team. However, because institutions are the primary +funding mechanism for the project, it is important to formally acknowledge +institutional participation in the project. These are Institutional Partners. + +An Institutional Contributor is any individual Project Contributor who +contributes to the project as part of their official duties at an Institutional +Partner. Likewise, an Institutional Core Team Member is any Core Team Member +who contributes to the project as part of their official duties at an +Institutional Partner. + +With these definitions, an Institutional Partner is any recognized legal entity +in the United States or elsewhere that employs at least one Institutional +Contributor or Institutional Core Team Member. Institutional Partners can be +for-profit or non-profit entities. + +Institutions become eligible to become an Institutional Partner by employing +individuals who actively contribute to The Project as part of their official +duties. To state this another way, the only way for an Institutional Partner to +influence the project is by actively contributing to the open development of +the project, on equal terms with any other member of the community of +Contributors and Core Team Members. Merely using pandas Software or Services in +an institutional context does not allow an entity to become an Institutional +Partner. Financial gifts do not enable an entity to become an Institutional +Partner. Once an institution becomes eligible for Institutional Partnership, +the Core Team must nominate and approve the Partnership. + +If an existing Institutional Partner no longer has a contributing employee, +they will be given a one-year grace period for other employees to begin +contributing. + +An Institutional Partner is free to pursue funding for their work on The +Project through any legal means. This could involve a non-profit organization +raising money from private foundations and donors or a for-profit company +building proprietary products and services that leverage Project Software and +Services. Funding acquired by Institutional Partners to work on The Project is +called Institutional Funding. However, no funding obtained by an Institutional +Partner can override The Project BDFL and Core Team. If a Partner has funding +to do pandas work and the Core Team decides to not pursue that work as a +project, the Partner is free to pursue it on their own. However in this +situation, that part of the Partner’s work will not be under the pandas +umbrella and cannot use the Project trademarks in a way that suggests a formal +relationship. + +To acknowledge institutional contributions, there are two levels of +Institutional Partners, with associated benefits: + +**Tier 1** = an institution with at least one Institutional Core Team Member + +- Acknowledged on the pandas website, in talks and T-shirts. +- Ability to acknowledge their own funding sources on the pandas website, in + talks and T-shirts. +- Ability to influence the project through the participation of their Core Team + Member. + +**Tier 2** = an institution with at least one Institutional Contributor + +## Breach + +Non-compliance with the terms of the governance documents shall be reported to +the Core Team either through public or private channels as deemed appropriate. + +## Changing the Governance + +Changes to the governance are submitted via a GitHub pull request to The Project's +[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +The pull request is then refined in response to public comment and review, with +the goal being consensus in the community. After this open period, a Core Team +Member proposes to the Core Team that the changes be ratified and the pull +request merged (accepting the proposed changes) or proposes that the pull +request be closed without merging (rejecting the proposed changes). The Member +should state the final commit hash in the pull request being proposed for +acceptance or rejection and briefly summarize the pull request. A minimum of +80% of the Core Team must vote and at least 2/3 of the votes must be positive +to carry out the proposed action (fractions of a vote rounded up to the nearest +integer). Since the BDFL holds ultimate authority in The Project, the BDFL has +authority to act alone in accepting or rejecting changes or overriding Core +Team decisions. From 59f4b1f6c2e4e201ef449b27d5735e641d559d33 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:20 -0500 Subject: [PATCH 043/184] New translations governance.md (Persian) --- web/pandas/fa/about/governance.md | 195 ++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 web/pandas/fa/about/governance.md diff --git a/web/pandas/fa/about/governance.md b/web/pandas/fa/about/governance.md new file mode 100644 index 000000000..b04a307c2 --- /dev/null +++ b/web/pandas/fa/about/governance.md @@ -0,0 +1,195 @@ +# دستورکار (حاکمیت) پروژه + +نسخه رسمی این سند، به همراه فهرستی از افراد و مؤسساتی که در نقش‌های تعریف‌شده در دستورکار قرار دارند +بخش زیر در صفحه [دستورکار پروژه]({{ base_url }}about/governance.html) در وب‌سایت پانداس قرار دارد. + +## پروژه + +پروژه پانداس (پروژه) یک پروژه نرم‌افزار منبع‌باز است که وابسته به بنیاد NumFOCUS با وضعیت ۵۰۱(c)۳ می‌باشد. هدف پروژه توسعه نرم‌افزار منبع‌باز برای واردسازی داده، آماده‌سازی داده، تحلیل داده و تجسم داده‌ها برای زبان برنامه‌نویسی پایتون است. نرم‌افزار توسعه‌یافته توسط پروژه تحت مجوز نرم‌افزار منبع‌باز BSD (یا مشابه آن) منتشر می‌شود، به طور عمومی توسعه داده می‌شود و در مخازن عمومی گیت‌هاب تحت سازمان پانداس در [گیت‌هاب](https://github.com/pandas-dev) میزبانی می‌شود. نمونه‌های نرم‌افزار پروژه شامل مخزن اصلی کد پانداس و کتابخانه pandas-stubs می‌باشد. + +از طریق وابستگی به NumFOCUS، پروژه حق دریافت کمک‌های مالی قابل کسر از مالیات در ایالات متحده آمریکا را دارد. + +پروژه توسط تیمی از توسعه‌دهندگان توزیع‌شده که مشارکت‌کنندگان نامیده می‌شوند، توسعه می‌یابد. مشارکت‌کنندگان افرادی هستند که کد، مستندات، طراحی‌ها یا سایر آثار را به یکی از مخازن پروژه یا بیشتر از آن‌ها ارائه داده‌اند. +هر کسی می‌تواند یک مشارکت‌کننده باشد. مشارکت‌کنندگان می‌توانند به هر نهاد قانونی وابسته باشند یا هیچ وابستگی قانونی نداشته باشند. مشارکت‌کنندگان با ارسال، بازبینی و بحث درباره درخواست‌های ادغام (Pull Requests) و مسائل (Issues) گیت‌هاب و همچنین مشارکت در بحث‌های باز و عمومی پروژه در گیت‌هاب، فهرست‌های پستی و سایر مکان‌ها، در پروژه شرکت می‌کنند. پایه‌گذار مشارکت در پروژه، باز بودن و شفافیت است. + +در اینجا فهرستی از مشارکت‌کنندگان فعلی مخزن اصلی پانداس آورده شده است: + +[https://github.com/pandas-dev/pandas/graphs/contributors](https://github.com/pandas-dev/pandas/graphs/contributors) + +همچنین بسیاری از مشارکت‌کنندگان دیگر در گزارش‌های سایر مخازن پروژه پانداس فهرست شده‌اند. + +جامعه پروژه شامل تمامی مشارکت‌کنندگان و کاربران پروژه است. +مشارکت‌کنندگان به نمایندگی از جامعه بزرگتر پروژه فعالیت می‌کنند و مسئولیت پاسخگویی به آن را دارند و ما تلاش می‌کنیم تا موانع بین مشارکت‌کنندگان و کاربران را تا حد امکان کاهش دهیم. + +پروژه به طور رسمی وابسته به بنیاد NumFOCUS با وضعیت ۵۰۱(c)3 است ([https://numfocus.org](https://numfocus.org))، که به عنوان حامی مالی پروژه عمل می‌کند، ممکن است مالک علائم تجاری و سایر مالکیت‌های معنوی پروژه باشد، به مدیریت کمک‌های مالی پروژه می‌پردازد و به عنوان نهاد قانونی مادر عمل می‌کند. NumFOCUS تنها نهاد قانونی است که رابطه رسمی با پروژه دارد (برای جزئیات بیشتر به بخش شرکای نهادی زیر مراجعه کنید). + +## دستور کار (حاکمیت) + +این بخش مدل دستورکار و رهبری پروژه را توضیح می‌دهد. + +بنیان‌های دستورکار پروژه عبارتند از: + +- باز بودن و شفافیت +- مشارکت فعال +- بی‌طرفی نهادی + +به طور سنتی، رهبری پروژه توسط یک BDFL (وس مک‌کینی) و گروهی از مشارکت‌کنندگان به نام تیم اصلی که مشارکت‌های فعال و مداوم آن‌ها مورد توجه قرار گرفته، انجام می‌شد. این تیم به دلیل دریافت "حق کامیت" (commit rights) به مخازن گیت‌هاب پروژه، شناخته شده است. به طور کلی، تمام تصمیمات پروژه از طریق اجماع در میان تیم اصلی و با دریافت نظرات از جامعه اتخاذ می‌شود. BDFL می‌تواند تصمیمات تیم اصلی را نادیده بگیرد و تصمیم نهایی را در مورد یک موضوع اتخاذ کند، اما این کار به ندرت انجام می‌دهد. + +در حالی که این رویکرد تا کنون برای ما مفید بوده است، با رشد پروژه و مواجهه آن با تصمیمات حقوقی و مالی بیشتر و تعامل با نهادهای دیگر، نیاز به یک مدل دستورکار رسمی‌تر احساس می‌شود. در آینده رهبری پروژه شامل یک BDFL و تیم اصلی خواهد بود. ما این مدل حکمرانی را به‌عنوان رسمی‌سازی آنچه که در حال حاضر انجام می‌دهیم می‌بینیم، نه تغییر جهت. + +### BDFL + +پروژه یک رهبر مطلق مهربان برای همیشه (BDFL) خواهد داشت، که در حال حاضر وس مک‌کینی است. به عنوان رهبر مطلق، BDFL اختیار اتخاذ تمام تصمیمات نهایی برای پروژه را دارد. به عنوان مهربان، BDFL در عمل ترجیح می‌دهد که این اختیار را به اجماع کانال‌های بحث جامعه و تیم اصلی واگذار کند. انتظار می‌رود و در گذشته نیز این‌گونه بوده است که BDFL تنها به ندرت اختیار نهایی خود را اعمال کند. از آنجا که این اختیار به ندرت استفاده می‌شود، ما اختیار نهایی BDFL را به عنوان رأی "ویژه" یا "رأی قاطع" می‌نامیم. هنگامی که این اتفاق می‌افتد، اعمال رأی برتر BDFL معمولاً در شرایطی رخ می‌دهد که در تیم اصلی بن‌بست ایجاد شود یا اگر تیم اصلی از BDFL بخواهد که در مورد یک موضوع خاص تصمیم‌گیری کند. برای اطمینان از خیرخواهی BDFL، پروژه دیگران را تشویق می‌کند که در صورت مخالفت با جهت‌گیری کلی BDFL، پروژه را انشعاب (fork) دهند. BDFL رئیس تیم اصلی است (به توضیحات زیر مراجعه کنید) و می‌تواند بنا به صلاحدید خود، اختیار خود در مورد یک تصمیم خاص یا مجموعه‌ای از تصمیمات را به هر یک از اعضای تیم اصلی واگذار کند. + +BDFL می‌تواند جانشین خود را منصوب کند، اما انتظار می‌رود که در این تصمیم با تیم اصلی مشورت شود. اگر BDFL نتواند جانشین خود را منصوب کند (مثلاً به دلیل فوت یا بیماری)، تیم اصلی از طریق رأی‌گیری جانشین را انتخاب خواهد کرد، به شرطی که دست‌کم دو سوم اعضای تیم اصلی به جانشین منتخب رأی موافق دهند. دست‌کم ۸۰٪ از اعضای تیم اصلی باید در رأی‌گیری شرکت کنند. اگر هیچ یک از نامزدهای BDFL موفق به کسب دو سوم آراء تیم اصلی نشود، اعضای تیم اصلی نامزدهای BDFL را به هیئت اصلی NumFOCUS معرفی خواهند کرد و سپس این هیئت تصمیم نهایی را اتخاذ خواهد کرد. + +### تیم اصلی + +تیم اصلی پروژه شامل مشارکت‌کنندگانی خواهد بود که مشارکت‌های آن‌ها از نظر کیفیت و کمیت قابل توجه بوده و حداقل به مدت یک سال به طور پیوسته ادامه داشته است. نقش کلی تیم اصلی این است که از طریق همکاری با BDFL و دریافت بازخورد از جامعه، سلامت بلندمدت پروژه را هم از نظر فنی و هم به عنوان یک جامعه تضمین کند. + +در فعالیت‌های روزمره پروژه، تیم اصلی به عنوان هم‌رده با سایر مشارکت‌کنندگان و جامعه در تمامی گفتگوها، بازبینی کد و دیگر فعالیت‌های پروژه شرکت می‌کند. در این فعالیت‌های روزمره، اعضای تیم اصلی به دلیل عضویت در تیم اصلی هیچ قدرت یا امتیاز ویژه‌ای ندارند. با این حال، انتظار می‌رود که به دلیل کیفیت و کمیت مشارکت‌هایشان و دانش تخصصی آن‌ها از نرم‌افزار پروژه، تیم اصلی راهنمایی‌های مفید، هم از نظر فنی و هم در جهت‌دهی پروژه، به مشارکت‌کنندگان کم‌تجربه‌تر ارائه دهد. + +تیم اصلی و اعضای آن در برخی شرایط نقش ویژه‌ای ایفا می‌کنند. +به‌ویژه، تیم اصلی ممکن است: + +- در مورد دامنه کلی، چشم‌انداز و جهت‌گیری پروژه تصمیم‌گیری کند. +- در مورد همکاری‌های راهبردی با سایر سازمان‌ها یا افراد تصمیم‌گیری کند. +- در مورد مسائل فنی خاص، ویژگی‌ها، اشکالات و درخواست‌های ادغام (Pull Requests) تصمیم‌گیری کند. آن‌ها سازوکار اصلی برای هدایت فرایند بازبینی کد و ادغام درخواست‌های ادغام (Pull Requests) هستند. +- در مورد خدماتی که توسط پروژه اجرا می‌شوند، تصمیم‌گیری کرده و این خدمات را به نفع پروژه و جامعه مدیریت کند. +- در مواردی که بحث‌های معمول جامعه در یک بازه زمانی منطقی به هم‌رأیی نمی‌رسد، تصمیم‌گیری کند. + +### پیوستن به تیم اصلی + +برای اینکه فردی واجد شرایط پیوستن به تیم اصلی شود، باید مشارکت‌کننده پروژه باشد که مشارکت‌های او از نظر کیفیت و کمیت قابل توجه بوده و حداقل به مدت یک سال به طور پیوسته ادامه داشته باشد. اعضای بالقوه تیم اصلی توسط اعضای کنونی تیم نامزد می‌شوند و پس از پرسش از فرد نامزد درباره علاقه‌مندی و آمادگی او برای فعالیت در این جایگاه، توسط تیم اصلی کنونی رأی‌گیری می‌شود. تیم اصلی در ابتدا از مجموعه‌ای از مشارکت‌کنندگانی تشکیل خواهد شد که تا اواخر سال ۲۰۱۵ حق کامیت دریافت کرده‌اند. + +هنگام بررسی اعضای پیشنهادی، تیم اصلی با نگاهی جامع به مشارکت‌های نامزدها خواهد نگریست. این موارد شامل اما نه محدود به کد نویسی، بازبینی کد، کارهای زیرساختی، مشارکت در فهرست‌های پستی و گفتگوها، کمک به جامعه/ساخت جامعه، آموزش و آگاهی‌رسانی، طراحی و سایر موارد است. ما به‌طور عمدی از تعیین معیارهای کمی خودسرانه (مانند "۱۰۰ کامیت در این مخزن") خودداری می‌کنیم تا از ایجاد رفتارهایی که صرفاً در راستای دستیابی به این معیارها است، به جای توجه به سلامت کلی پروژه، جلوگیری کنیم. ما می‌خواهیم طیف گسترده‌ای از پیشینه‌ها، دیدگاه‌ها و استعدادها را در تیم خود تشویق کنیم، به همین دلیل به‌صراحت کدنویسی را به عنوان تنها معیار برای ارزیابی عضویت در تیم اصلی تعریف نمی‌کنیم. + +اگر یکی از اعضای تیم اصلی به مدت یک سال در پروژه غیرفعال باشد، ممکن است مورد بررسی برای حذف از تیم اصلی قرار گیرد. پیش از حذف، BDFL با عضو غیرفعال تماس خواهد گرفت تا ببیند آیا قصد بازگشت به مشارکت فعال را دارد یا خیر. در صورت عدم تمایل، بلافاصله پس از رأی‌گیری تیم اصلی از تیم حذف خواهند شد. اگر قصد بازگشت به مشارکت فعال را در آینده نزدیک داشته باشند، به آن‌ها یک دوره مهلت یک‌ساله داده خواهد شد. اگر در طی آن دوره به مشارکت فعال بازنگردند، بدون هیچ مهلت اضافی، با رأی تیم اصلی از تیم حذف خواهند شد. تمام اعضای پیشین تیم اصلی می‌توانند در هر زمانی در آینده، مانند هر مشارکت‌کننده پروژه دیگری، دوباره برای عضویت مورد بررسی قرار گیرند. +اعضای بازنشسته تیم اصلی در وب‌سایت پروژه فهرست خواهند شد و مدت زمانی که در تیم اصلی فعال بوده‌اند، مورد اشاره قرار خواهد گرفت. + +تیم اصلی این حق را برای خود محفوظ می‌دارد که اعضای فعلی، به‌جز BDFL، را در صورتی که به طور فعال به سلامت پروژه آسیب برسانند و تلاش‌ها برای برقراری ارتباط و حل اختلاف ناکام مانده باشد، از تیم اخراج کند. + +### تضاد منافع + +انتظار می‌رود که BDFL و اعضای تیم اصلی در طیف گسترده‌ای از شرکت‌ها، دانشگاه‌ها و سازمان‌های غیرانتفاعی مشغول به کار باشند. به همین دلیل، این امکان وجود دارد که اعضا با تعارض منافع روبه‌رو شوند. این تعارض منافع شامل موارد زیر می‌شود، اما به این‌ها محدود نیست: + +- منافع مالی، مانند سرمایه‌گذاری‌ها، اشتغال یا قراردادهای کاری خارج از پروژه که ممکن است بر فعالیت آن‌ها در پروژه تأثیر بگذارد. +- دسترسی به اطلاعات اختصاصی کارفرمای خود که ممکن است به طور ناخواسته به فعالیت‌های آن‌ها در پروژه نفوذ کند. + +تمام اعضای تیم اصلی، از جمله BDFL، باید هرگونه تعارض منافع احتمالی خود را به سایر اعضای تیم اصلی اطلاع دهند. اعضایی که در یک موضوع خاص تعارض منافع دارند، می‌توانند در بحث‌های تیم اصلی درباره آن موضوع شرکت کنند، اما باید از رأی دادن در مورد آن موضوع خودداری کنند. If the BDFL has +recused his/herself for a particular decision, they will appoint a substitute +BDFL for that decision. + +### Private communications of the Core Team + +Unless specifically required, all Core Team discussions and activities will be +public and done in collaboration and discussion with the Project Contributors +and Community. The Core Team will have a private mailing list that will be used +sparingly and only when a specific matter requires privacy. When private +communications and decisions are needed, the Core Team will do its best to +summarize those to the Community after eliding personal/private/sensitive +information that should not be posted to the public internet. + +### Subcommittees + +The Core Team can create subcommittees that provide leadership and guidance for +specific aspects of the project. Like the Core Team as a whole, subcommittees +should conduct their business in an open and public manner unless privacy is +specifically called for. Private subcommittee communications should happen on +the main private mailing list of the Core Team unless specifically called for. + +Question: if the BDFL is not on a subcommittee, do they still have override +authority? + +Suggestion: they do, but they should appoint a delegate who plays that role +most of the time, and explicit BDFL intervention is sought only if the +committee disagrees with that delegate’s decision and no resolution is possible +within the team. This is different from a BDFL delegate for a specific decision +(or a recusal situation), where the BDFL is literally giving up his/her +authority to someone else in full. It’s more like what Linus Torvalds uses with his +“lieutenants” model. + +### NumFOCUS Subcommittee + +The Core Team will maintain one narrowly focused subcommittee to manage its +interactions with NumFOCUS. + +- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage + project funding that comes through NumFOCUS. It is expected that these funds + will be spent in a manner that is consistent with the non-profit mission of + NumFOCUS and the direction of the Project as determined by the full Core + Team. +- This Subcommittee shall NOT make decisions about the direction, scope or + technical direction of the Project. +- This Subcommittee will have at least 5 members. No more than 2 Subcommittee + Members can report to one person (either directly or indirectly) through + employment or contracting work (including the reportee, i.e. the reportee + 1 + is the max). This avoids effective majorities resting on one person. + +## Institutional Partners and Funding + +The BDFL and Core Team are the primary leadership for the project. No outside +institution, individual or legal entity has the ability to own, control, usurp +or influence the project other than by participating in the Project as +Contributors and Core Team. However, because institutions are the primary +funding mechanism for the project, it is important to formally acknowledge +institutional participation in the project. These are Institutional Partners. + +An Institutional Contributor is any individual Project Contributor who +contributes to the project as part of their official duties at an Institutional +Partner. Likewise, an Institutional Core Team Member is any Core Team Member +who contributes to the project as part of their official duties at an +Institutional Partner. + +With these definitions, an Institutional Partner is any recognized legal entity +in the United States or elsewhere that employs at least one Institutional +Contributor or Institutional Core Team Member. Institutional Partners can be +for-profit or non-profit entities. + +Institutions become eligible to become an Institutional Partner by employing +individuals who actively contribute to The Project as part of their official +duties. To state this another way, the only way for an Institutional Partner to +influence the project is by actively contributing to the open development of +the project, on equal terms with any other member of the community of +Contributors and Core Team Members. Merely using pandas Software or Services in +an institutional context does not allow an entity to become an Institutional +Partner. Financial gifts do not enable an entity to become an Institutional +Partner. Once an institution becomes eligible for Institutional Partnership, +the Core Team must nominate and approve the Partnership. + +If an existing Institutional Partner no longer has a contributing employee, +they will be given a one-year grace period for other employees to begin +contributing. + +شریک نهادی آزاد است که برای فعالیت‌های خود در پروژه از هر طریق قانونی تأمین مالی کند. این می‌تواند شامل یک سازمان غیرانتفاعی باشد که از بنیادهای خصوصی و اهداکنندگان پول جمع‌آوری می‌کند، یا یک شرکت انتفاعی که محصولات و خدمات اختصاصی ایجاد می‌کند و از نرم‌افزارها و خدمات پروژه بهره می‌برد. Funding acquired by Institutional Partners to work on The Project is +called Institutional Funding. However, no funding obtained by an Institutional +Partner can override The Project BDFL and Core Team. If a Partner has funding +to do pandas work and the Core Team decides to not pursue that work as a +project, the Partner is free to pursue it on their own. However in this +situation, that part of the Partner’s work will not be under the pandas +umbrella and cannot use the Project trademarks in a way that suggests a formal +relationship. + +To acknowledge institutional contributions, there are two levels of +Institutional Partners, with associated benefits: + +**Tier 1** = an institution with at least one Institutional Core Team Member + +- Acknowledged on the pandas website, in talks and T-shirts. +- Ability to acknowledge their own funding sources on the pandas website, in + talks and T-shirts. +- Ability to influence the project through the participation of their Core Team + Member. + +**Tier 2** = an institution with at least one Institutional Contributor + +## Breach + +Non-compliance with the terms of the governance documents shall be reported to +the Core Team either through public or private channels as deemed appropriate. + +## Changing the Governance + +Changes to the governance are submitted via a GitHub pull request to The Project's +[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +درخواست ادغام سپس در پاسخ به نظرات عمومی و بازبینی‌ها بهبود می‌یابد، با این هدف که به هم‌رأیی در جامعه برسد. پس از این دوره باز، یکی از اعضای تیم اصلی به تیم اصلی پیشنهاد می‌دهد که تغییرات تأیید شده و درخواست ادغام پذیرفته شود (قبول تغییرات پیشنهادی) یا پیشنهاد می‌دهد که درخواست ادغام بدون ادغام بسته شود (رد تغییرات پیشنهادی). عضو باید هش نهایی کامیت را در درخواست ادغام پیشنهادی برای پذیرش یا رد ذکر کرده و به‌طور خلاصه درخواست ادغام را توضیح دهد. حداقل ۸۰٪ از اعضای تیم اصلی باید رأی دهند و دست‌کم دو سوم از آراء باید مثبت باشد تا اقدام پیشنهادی انجام شود (کسری آراء به نزدیک‌ترین عدد صحیح گرد می‌شود). از آنجا که BDFL دارای اختیار نهایی در پروژه است، این حق را دارد که به‌تنهایی تغییرات را بپذیرد یا رد کند و یا تصمیمات تیم اصلی را نادیده بگیرد. From 3748e3bc3116f27ecdd3d131cf39e2cb0236e79c Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:21 -0500 Subject: [PATCH 044/184] New translations governance.md (Tamil) --- web/pandas/ta/about/governance.md | 314 ++++++++++++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 web/pandas/ta/about/governance.md diff --git a/web/pandas/ta/about/governance.md b/web/pandas/ta/about/governance.md new file mode 100644 index 000000000..908b3549e --- /dev/null +++ b/web/pandas/ta/about/governance.md @@ -0,0 +1,314 @@ +# திட்ட நிர்வாகம் + +இந்த ஆவணத்தின் அதிகாரப்பூர்வ பதிப்பு, கீழே உள்ள ஆளுகைப் பிரிவில் வரையறுக்கப்பட்ட பாத்திரங்களில் உள்ள தனிநபர்கள் மற்றும் நிறுவனங்களின் பட்டியலுடன், பாண்டாஸ் வலைத்தளத்தின் +[திட்ட நிர்வாகம்]({{ base_url }}about/governance.html) +பக்கத்தில் உள்ளது. + +## திட்டம் + +பாண்டாஸ் திட்டம் (திட்டம்) என்பது 501(c)3 NumFOCUS அறக்கட்டளையுடன் இணைக்கப்பட்ட ஒரு திறந்த மூல மென்பொருள் திட்டமாகும். The goal of The Project is to develop open +source software for data ingest, data preparation, data analysis, and data +visualization for the Python programming language. The Software developed by +The Project is released under the BSD (or similar) open source license, +developed openly and hosted in public GitHub repositories under the pandas +GitHub organization. Examples of Project Software +include the main pandas code repository and the pandas-stubs library. + +Through its affiliation with NumFOCUS, The Project has the right to receive +tax-deductible donations in the United States of America. + +The Project is developed by a team of distributed developers, called +Contributors. Contributors are individuals who have contributed code, +documentation, designs or other work to one or more Project repositories. +Anyone can be a Contributor. Contributors can be affiliated with any legal +entity or none. Contributors participate in the project by submitting, +reviewing and discussing GitHub Pull Requests and Issues and participating in +open and public Project discussions on GitHub, mailing lists, and +elsewhere. The foundation of Project participation is openness and +transparency. + +Here is a list of the current Contributors to the main pandas repository: + +[https://github.com/pandas-dev/pandas/graphs/contributors](https://github.com/pandas-dev/pandas/graphs/contributors) + +There are also many other Contributors listed in the logs of other repositories of +the pandas project. + +The Project Community consists of all Contributors and Users of the Project. +Contributors work on behalf of and are responsible to the larger Project +Community and we strive to keep the barrier between Contributors and Users as +low as possible. + +The Project is formally affiliated with the 501(c)3 NumFOCUS Foundation +([https://numfocus.org](https://numfocus.org)), which serves as its fiscal +sponsor, may hold project trademarks and other intellectual property, helps +manage project donations and acts as a parent legal entity. NumFOCUS is the +only legal entity that has a formal relationship with the project (see +Institutional Partners section below). + +## Governance + +This section describes the governance and leadership model of The Project. + +The foundations of Project governance are: + +- Openness & Transparency +- Active Contribution +- Institutional Neutrality + +Traditionally, Project leadership was provided by a BDFL (Wes McKinney) and +subset of Contributors, called the Core Team, whose active and consistent +contributions have been recognized by their receiving “commit rights” to the +Project GitHub repositories. In general all Project decisions are made through +consensus among the Core Team with input from the Community. The BDFL can, but +rarely chooses to, override the Core Team and make a final decision on a +matter. + +While this approach has served us well, as the Project grows and faces more +legal and financial decisions and interacts with other institutions, we see a +need for a more formal governance model. Moving forward The Project leadership +will consist of a BDFL and Core Team. We view this governance model as the +formalization of what we are already doing, rather than a change in direction. + +### BDFL + +The Project will have a BDFL (Benevolent Dictator for Life), who is currently +Wes McKinney. As Dictator, the BDFL has the authority to make all final +decisions for The Project. As Benevolent, the BDFL, in practice chooses to +defer that authority to the consensus of the community discussion channels and +the Core Team. It is expected, and in the past has been the case, that the BDFL +will only rarely assert his/her final authority. Because it is rarely used, we +refer to BDFL’s final authority as a “special” or “overriding” vote. When it +does occur, the BDFL override typically happens in situations where there is a +deadlock in the Core Team or if the Core Team ask the BDFL to make a decision +on a specific matter. To ensure the benevolence of the BDFL, The Project +encourages others to fork the project if they disagree with the overall +direction the BDFL is taking. The BDFL is chair of the Core Team (see below) +and may delegate his/her authority on a particular decision or set of decisions +to any other Core Team Member at his/her discretion. + +The BDFL can appoint his/her successor, but it is expected that the Core Team +would be consulted on this decision. If the BDFL is unable to appoint a +successor (e.g. due to death or illness), the Core Team will choose a successor +by voting with at least 2/3 of the Core Team members voting in favor of the +chosen successor. At least 80% of the Core Team must participate in the +vote. If no BDFL candidate receives 2/3 of the votes of the Core Team, the Core +Team members shall propose the BDFL candidates to the Main NumFOCUS board, who +will then make the final decision. + +### Core Team + +The Project's Core Team will consist of Project Contributors who have produced +contributions that are substantial in quality and quantity, and sustained over +at least one year. The overall role of the Core Team is to ensure, through +working with the BDFL and taking input from the Community, the long-term +well-being of the project, both technically and as a community. + +During the everyday project activities, Core Team participate in all +discussions, code review and other project activities as peers with all other +Contributors and the Community. In these everyday activities, Core Team do not +have any special power or privilege through their membership on the Core +Team. However, it is expected that because of the quality and quantity of their +contributions and their expert knowledge of the Project Software that the Core +Team will provide useful guidance, both technical and in terms of project +direction, to potentially less experienced contributors. + +The Core Team and its Members play a special role in certain situations. +In particular, the Core Team may: + +- Make decisions about the overall scope, vision and direction of the + project. +- Make decisions about strategic collaborations with other organizations or + individuals. +- Make decisions about specific technical issues, features, bugs and pull + requests. They are the primary mechanism of guiding the code review process + and merging pull requests. +- Make decisions about the Services that are run by The Project and manage + those Services for the benefit of the Project and Community. +- Make decisions when regular community discussion doesn't produce consensus + on an issue in a reasonable time frame. + +### Core Team membership + +To become eligible for being a Core Team Member an individual must be a Project +Contributor who has produced contributions that are substantial in quality and +quantity, and sustained over at least one year. Potential Core Team Members are +nominated by existing Core members and voted upon by the existing Core Team +after asking if the potential Member is interested and willing to serve in that +capacity. The Core Team will be initially formed from the set of existing +Contributors who have been granted commit rights as of late 2015. + +When considering potential Members, the Core Team will look at candidates with +a comprehensive view of their contributions. This will include but is not +limited to code, code review, infrastructure work, mailing list and chat +participation, community help/building, education and outreach, design work, +etc. We are deliberately not setting arbitrary quantitative metrics (like “100 +commits in this repo”) to avoid encouraging behavior that plays to the metrics +rather than the project’s overall well-being. We want to encourage a diverse +array of backgrounds, viewpoints and talents in our team, which is why we +explicitly do not define code as the sole metric on which Core Team membership +will be evaluated. + +If a Core Team member becomes inactive in the project for a period of one year, +they will be considered for removal from the Core Team. Before removal, +inactive Member will be approached by the BDFL to see if they plan on returning +to active participation. If not they will be removed immediately upon a Core +Team vote. If they plan on returning to active participation soon, they will be +given a grace period of one year. If they don't return to active participation +within that time period they will be removed by vote of the Core Team without +further grace period. All former Core Team members can be considered for +membership again at any time in the future, like any other Project Contributor. +Retired Core Team members will be listed on the project website, acknowledging +the period during which they were active in the Core Team. + +The Core Team reserves the right to eject current Members, other than the BDFL, +if they are deemed to be actively harmful to the project’s well-being, and +attempts at communication and conflict resolution have failed. + +### Conflict of interest + +It is expected that the BDFL and Core Team Members will be employed at a wide +range of companies, universities and non-profit organizations. Because of this, +it is possible that Members will have conflict of interests. Such conflict of +interests include, but are not limited to: + +- Financial interests, such as investments, employment or contracting work, + outside of The Project that may influence their work on The Project. +- Access to proprietary information of their employer that could potentially + leak into their work with the Project. + +All members of the Core Team, BDFL included, shall disclose to the rest of the +Core Team any conflict of interest they may have. Members with a conflict of +interest in a particular issue may participate in Core Team discussions on that +issue, but must recuse themselves from voting on the issue. If the BDFL has +recused his/herself for a particular decision, they will appoint a substitute +BDFL for that decision. + +### Private communications of the Core Team + +Unless specifically required, all Core Team discussions and activities will be +public and done in collaboration and discussion with the Project Contributors +and Community. The Core Team will have a private mailing list that will be used +sparingly and only when a specific matter requires privacy. When private +communications and decisions are needed, the Core Team will do its best to +summarize those to the Community after eliding personal/private/sensitive +information that should not be posted to the public internet. + +### Subcommittees + +The Core Team can create subcommittees that provide leadership and guidance for +specific aspects of the project. Like the Core Team as a whole, subcommittees +should conduct their business in an open and public manner unless privacy is +specifically called for. Private subcommittee communications should happen on +the main private mailing list of the Core Team unless specifically called for. + +Question: if the BDFL is not on a subcommittee, do they still have override +authority? + +Suggestion: they do, but they should appoint a delegate who plays that role +most of the time, and explicit BDFL intervention is sought only if the +committee disagrees with that delegate’s decision and no resolution is possible +within the team. This is different from a BDFL delegate for a specific decision +(or a recusal situation), where the BDFL is literally giving up his/her +authority to someone else in full. It’s more like what Linus Torvalds uses with his +“lieutenants” model. + +### NumFOCUS Subcommittee + +The Core Team will maintain one narrowly focused subcommittee to manage its +interactions with NumFOCUS. + +- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage + project funding that comes through NumFOCUS. It is expected that these funds + will be spent in a manner that is consistent with the non-profit mission of + NumFOCUS and the direction of the Project as determined by the full Core + Team. +- This Subcommittee shall NOT make decisions about the direction, scope or + technical direction of the Project. +- This Subcommittee will have at least 5 members. No more than 2 Subcommittee + Members can report to one person (either directly or indirectly) through + employment or contracting work (including the reportee, i.e. the reportee + 1 + is the max). This avoids effective majorities resting on one person. + +## Institutional Partners and Funding + +The BDFL and Core Team are the primary leadership for the project. No outside +institution, individual or legal entity has the ability to own, control, usurp +or influence the project other than by participating in the Project as +Contributors and Core Team. However, because institutions are the primary +funding mechanism for the project, it is important to formally acknowledge +institutional participation in the project. These are Institutional Partners. + +An Institutional Contributor is any individual Project Contributor who +contributes to the project as part of their official duties at an Institutional +Partner. Likewise, an Institutional Core Team Member is any Core Team Member +who contributes to the project as part of their official duties at an +Institutional Partner. + +With these definitions, an Institutional Partner is any recognized legal entity +in the United States or elsewhere that employs at least one Institutional +Contributor or Institutional Core Team Member. Institutional Partners can be +for-profit or non-profit entities. + +Institutions become eligible to become an Institutional Partner by employing +individuals who actively contribute to The Project as part of their official +duties. To state this another way, the only way for an Institutional Partner to +influence the project is by actively contributing to the open development of +the project, on equal terms with any other member of the community of +Contributors and Core Team Members. Merely using pandas Software or Services in +an institutional context does not allow an entity to become an Institutional +Partner. Financial gifts do not enable an entity to become an Institutional +Partner. Once an institution becomes eligible for Institutional Partnership, +the Core Team must nominate and approve the Partnership. + +If an existing Institutional Partner no longer has a contributing employee, +they will be given a one-year grace period for other employees to begin +contributing. + +An Institutional Partner is free to pursue funding for their work on The +Project through any legal means. This could involve a non-profit organization +raising money from private foundations and donors or a for-profit company +building proprietary products and services that leverage Project Software and +Services. Funding acquired by Institutional Partners to work on The Project is +called Institutional Funding. However, no funding obtained by an Institutional +Partner can override The Project BDFL and Core Team. If a Partner has funding +to do pandas work and the Core Team decides to not pursue that work as a +project, the Partner is free to pursue it on their own. However in this +situation, that part of the Partner’s work will not be under the pandas +umbrella and cannot use the Project trademarks in a way that suggests a formal +relationship. + +To acknowledge institutional contributions, there are two levels of +Institutional Partners, with associated benefits: + +**Tier 1** = an institution with at least one Institutional Core Team Member + +- Acknowledged on the pandas website, in talks and T-shirts. +- Ability to acknowledge their own funding sources on the pandas website, in + talks and T-shirts. +- Ability to influence the project through the participation of their Core Team + Member. + +**Tier 2** = an institution with at least one Institutional Contributor + +## Breach + +Non-compliance with the terms of the governance documents shall be reported to +the Core Team either through public or private channels as deemed appropriate. + +## Changing the Governance + +Changes to the governance are submitted via a GitHub pull request to The Project's +[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +The pull request is then refined in response to public comment and review, with +the goal being consensus in the community. After this open period, a Core Team +Member proposes to the Core Team that the changes be ratified and the pull +request merged (accepting the proposed changes) or proposes that the pull +request be closed without merging (rejecting the proposed changes). The Member +should state the final commit hash in the pull request being proposed for +acceptance or rejection and briefly summarize the pull request. A minimum of +80% of the Core Team must vote and at least 2/3 of the votes must be positive +to carry out the proposed action (fractions of a vote rounded up to the nearest +integer). Since the BDFL holds ultimate authority in The Project, the BDFL has +authority to act alone in accepting or rejecting changes or overriding Core +Team decisions. From a22d9a016c7e81a0607955df08faa8bda0e68e10 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:22 -0500 Subject: [PATCH 045/184] New translations governance.md (Hindi) --- web/pandas/hi/about/governance.md | 317 ++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 web/pandas/hi/about/governance.md diff --git a/web/pandas/hi/about/governance.md b/web/pandas/hi/about/governance.md new file mode 100644 index 000000000..b37925e89 --- /dev/null +++ b/web/pandas/hi/about/governance.md @@ -0,0 +1,317 @@ +# Project governance + +The official version of this document, along with a list of +individuals and institutions in the roles defined in the governance +section below, is contained in the +[Project governance]({{ base_url }}about/governance.html) +page of the pandas website. + +## The Project + +The pandas Project (The Project) is an open source software project affiliated +with the 501(c)3 NumFOCUS Foundation. The goal of The Project is to develop open +source software for data ingest, data preparation, data analysis, and data +visualization for the Python programming language. The Software developed by +The Project is released under the BSD (or similar) open source license, +developed openly and hosted in public GitHub repositories under the pandas +GitHub organization. Examples of Project Software +include the main pandas code repository and the pandas-stubs library. + +Through its affiliation with NumFOCUS, The Project has the right to receive +tax-deductible donations in the United States of America. + +The Project is developed by a team of distributed developers, called +Contributors. Contributors are individuals who have contributed code, +documentation, designs or other work to one or more Project repositories. +Anyone can be a Contributor. Contributors can be affiliated with any legal +entity or none. Contributors participate in the project by submitting, +reviewing and discussing GitHub Pull Requests and Issues and participating in +open and public Project discussions on GitHub, mailing lists, and +elsewhere. The foundation of Project participation is openness and +transparency. + +Here is a list of the current Contributors to the main pandas repository: + +[https://github.com/pandas-dev/pandas/graphs/contributors](https://github.com/pandas-dev/pandas/graphs/contributors) + +There are also many other Contributors listed in the logs of other repositories of +the pandas project. + +The Project Community consists of all Contributors and Users of the Project. +Contributors work on behalf of and are responsible to the larger Project +Community and we strive to keep the barrier between Contributors and Users as +low as possible. + +The Project is formally affiliated with the 501(c)3 NumFOCUS Foundation +([https://numfocus.org](https://numfocus.org)), which serves as its fiscal +sponsor, may hold project trademarks and other intellectual property, helps +manage project donations and acts as a parent legal entity. NumFOCUS is the +only legal entity that has a formal relationship with the project (see +Institutional Partners section below). + +## Governance + +This section describes the governance and leadership model of The Project. + +The foundations of Project governance are: + +- Openness & Transparency +- Active Contribution +- Institutional Neutrality + +Traditionally, Project leadership was provided by a BDFL (Wes McKinney) and +subset of Contributors, called the Core Team, whose active and consistent +contributions have been recognized by their receiving “commit rights” to the +Project GitHub repositories. In general all Project decisions are made through +consensus among the Core Team with input from the Community. The BDFL can, but +rarely chooses to, override the Core Team and make a final decision on a +matter. + +While this approach has served us well, as the Project grows and faces more +legal and financial decisions and interacts with other institutions, we see a +need for a more formal governance model. Moving forward The Project leadership +will consist of a BDFL and Core Team. We view this governance model as the +formalization of what we are already doing, rather than a change in direction. + +### BDFL + +The Project will have a BDFL (Benevolent Dictator for Life), who is currently +Wes McKinney. As Dictator, the BDFL has the authority to make all final +decisions for The Project. As Benevolent, the BDFL, in practice chooses to +defer that authority to the consensus of the community discussion channels and +the Core Team. It is expected, and in the past has been the case, that the BDFL +will only rarely assert his/her final authority. Because it is rarely used, we +refer to BDFL’s final authority as a “special” or “overriding” vote. When it +does occur, the BDFL override typically happens in situations where there is a +deadlock in the Core Team or if the Core Team ask the BDFL to make a decision +on a specific matter. To ensure the benevolence of the BDFL, The Project +encourages others to fork the project if they disagree with the overall +direction the BDFL is taking. The BDFL is chair of the Core Team (see below) +and may delegate his/her authority on a particular decision or set of decisions +to any other Core Team Member at his/her discretion. + +The BDFL can appoint his/her successor, but it is expected that the Core Team +would be consulted on this decision. If the BDFL is unable to appoint a +successor (e.g. due to death or illness), the Core Team will choose a successor +by voting with at least 2/3 of the Core Team members voting in favor of the +chosen successor. At least 80% of the Core Team must participate in the +vote. If no BDFL candidate receives 2/3 of the votes of the Core Team, the Core +Team members shall propose the BDFL candidates to the Main NumFOCUS board, who +will then make the final decision. + +### Core Team + +The Project's Core Team will consist of Project Contributors who have produced +contributions that are substantial in quality and quantity, and sustained over +at least one year. The overall role of the Core Team is to ensure, through +working with the BDFL and taking input from the Community, the long-term +well-being of the project, both technically and as a community. + +During the everyday project activities, Core Team participate in all +discussions, code review and other project activities as peers with all other +Contributors and the Community. In these everyday activities, Core Team do not +have any special power or privilege through their membership on the Core +Team. However, it is expected that because of the quality and quantity of their +contributions and their expert knowledge of the Project Software that the Core +Team will provide useful guidance, both technical and in terms of project +direction, to potentially less experienced contributors. + +The Core Team and its Members play a special role in certain situations. +In particular, the Core Team may: + +- Make decisions about the overall scope, vision and direction of the + project. +- Make decisions about strategic collaborations with other organizations or + individuals. +- Make decisions about specific technical issues, features, bugs and pull + requests. They are the primary mechanism of guiding the code review process + and merging pull requests. +- Make decisions about the Services that are run by The Project and manage + those Services for the benefit of the Project and Community. +- Make decisions when regular community discussion doesn't produce consensus + on an issue in a reasonable time frame. + +### Core Team membership + +To become eligible for being a Core Team Member an individual must be a Project +Contributor who has produced contributions that are substantial in quality and +quantity, and sustained over at least one year. Potential Core Team Members are +nominated by existing Core members and voted upon by the existing Core Team +after asking if the potential Member is interested and willing to serve in that +capacity. The Core Team will be initially formed from the set of existing +Contributors who have been granted commit rights as of late 2015. + +When considering potential Members, the Core Team will look at candidates with +a comprehensive view of their contributions. This will include but is not +limited to code, code review, infrastructure work, mailing list and chat +participation, community help/building, education and outreach, design work, +etc. We are deliberately not setting arbitrary quantitative metrics (like “100 +commits in this repo”) to avoid encouraging behavior that plays to the metrics +rather than the project’s overall well-being. We want to encourage a diverse +array of backgrounds, viewpoints and talents in our team, which is why we +explicitly do not define code as the sole metric on which Core Team membership +will be evaluated. + +If a Core Team member becomes inactive in the project for a period of one year, +they will be considered for removal from the Core Team. Before removal, +inactive Member will be approached by the BDFL to see if they plan on returning +to active participation. If not they will be removed immediately upon a Core +Team vote. If they plan on returning to active participation soon, they will be +given a grace period of one year. If they don't return to active participation +within that time period they will be removed by vote of the Core Team without +further grace period. All former Core Team members can be considered for +membership again at any time in the future, like any other Project Contributor. +Retired Core Team members will be listed on the project website, acknowledging +the period during which they were active in the Core Team. + +The Core Team reserves the right to eject current Members, other than the BDFL, +if they are deemed to be actively harmful to the project’s well-being, and +attempts at communication and conflict resolution have failed. + +### Conflict of interest + +It is expected that the BDFL and Core Team Members will be employed at a wide +range of companies, universities and non-profit organizations. Because of this, +it is possible that Members will have conflict of interests. Such conflict of +interests include, but are not limited to: + +- Financial interests, such as investments, employment or contracting work, + outside of The Project that may influence their work on The Project. +- Access to proprietary information of their employer that could potentially + leak into their work with the Project. + +All members of the Core Team, BDFL included, shall disclose to the rest of the +Core Team any conflict of interest they may have. Members with a conflict of +interest in a particular issue may participate in Core Team discussions on that +issue, but must recuse themselves from voting on the issue. If the BDFL has +recused his/herself for a particular decision, they will appoint a substitute +BDFL for that decision. + +### Private communications of the Core Team + +Unless specifically required, all Core Team discussions and activities will be +public and done in collaboration and discussion with the Project Contributors +and Community. The Core Team will have a private mailing list that will be used +sparingly and only when a specific matter requires privacy. When private +communications and decisions are needed, the Core Team will do its best to +summarize those to the Community after eliding personal/private/sensitive +information that should not be posted to the public internet. + +### Subcommittees + +The Core Team can create subcommittees that provide leadership and guidance for +specific aspects of the project. Like the Core Team as a whole, subcommittees +should conduct their business in an open and public manner unless privacy is +specifically called for. Private subcommittee communications should happen on +the main private mailing list of the Core Team unless specifically called for. + +Question: if the BDFL is not on a subcommittee, do they still have override +authority? + +Suggestion: they do, but they should appoint a delegate who plays that role +most of the time, and explicit BDFL intervention is sought only if the +committee disagrees with that delegate’s decision and no resolution is possible +within the team. This is different from a BDFL delegate for a specific decision +(or a recusal situation), where the BDFL is literally giving up his/her +authority to someone else in full. It’s more like what Linus Torvalds uses with his +“lieutenants” model. + +### NumFOCUS Subcommittee + +The Core Team will maintain one narrowly focused subcommittee to manage its +interactions with NumFOCUS. + +- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage + project funding that comes through NumFOCUS. It is expected that these funds + will be spent in a manner that is consistent with the non-profit mission of + NumFOCUS and the direction of the Project as determined by the full Core + Team. +- This Subcommittee shall NOT make decisions about the direction, scope or + technical direction of the Project. +- This Subcommittee will have at least 5 members. No more than 2 Subcommittee + Members can report to one person (either directly or indirectly) through + employment or contracting work (including the reportee, i.e. the reportee + 1 + is the max). This avoids effective majorities resting on one person. + +## Institutional Partners and Funding + +The BDFL and Core Team are the primary leadership for the project. No outside +institution, individual or legal entity has the ability to own, control, usurp +or influence the project other than by participating in the Project as +Contributors and Core Team. However, because institutions are the primary +funding mechanism for the project, it is important to formally acknowledge +institutional participation in the project. These are Institutional Partners. + +An Institutional Contributor is any individual Project Contributor who +contributes to the project as part of their official duties at an Institutional +Partner. Likewise, an Institutional Core Team Member is any Core Team Member +who contributes to the project as part of their official duties at an +Institutional Partner. + +With these definitions, an Institutional Partner is any recognized legal entity +in the United States or elsewhere that employs at least one Institutional +Contributor or Institutional Core Team Member. Institutional Partners can be +for-profit or non-profit entities. + +Institutions become eligible to become an Institutional Partner by employing +individuals who actively contribute to The Project as part of their official +duties. To state this another way, the only way for an Institutional Partner to +influence the project is by actively contributing to the open development of +the project, on equal terms with any other member of the community of +Contributors and Core Team Members. Merely using pandas Software or Services in +an institutional context does not allow an entity to become an Institutional +Partner. Financial gifts do not enable an entity to become an Institutional +Partner. Once an institution becomes eligible for Institutional Partnership, +the Core Team must nominate and approve the Partnership. + +If an existing Institutional Partner no longer has a contributing employee, +they will be given a one-year grace period for other employees to begin +contributing. + +An Institutional Partner is free to pursue funding for their work on The +Project through any legal means. This could involve a non-profit organization +raising money from private foundations and donors or a for-profit company +building proprietary products and services that leverage Project Software and +Services. Funding acquired by Institutional Partners to work on The Project is +called Institutional Funding. However, no funding obtained by an Institutional +Partner can override The Project BDFL and Core Team. If a Partner has funding +to do pandas work and the Core Team decides to not pursue that work as a +project, the Partner is free to pursue it on their own. However in this +situation, that part of the Partner’s work will not be under the pandas +umbrella and cannot use the Project trademarks in a way that suggests a formal +relationship. + +To acknowledge institutional contributions, there are two levels of +Institutional Partners, with associated benefits: + +**Tier 1** = an institution with at least one Institutional Core Team Member + +- Acknowledged on the pandas website, in talks and T-shirts. +- Ability to acknowledge their own funding sources on the pandas website, in + talks and T-shirts. +- Ability to influence the project through the participation of their Core Team + Member. + +**Tier 2** = an institution with at least one Institutional Contributor + +## Breach + +Non-compliance with the terms of the governance documents shall be reported to +the Core Team either through public or private channels as deemed appropriate. + +## Changing the Governance + +Changes to the governance are submitted via a GitHub pull request to The Project's +[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +The pull request is then refined in response to public comment and review, with +the goal being consensus in the community. After this open period, a Core Team +Member proposes to the Core Team that the changes be ratified and the pull +request merged (accepting the proposed changes) or proposes that the pull +request be closed without merging (rejecting the proposed changes). The Member +should state the final commit hash in the pull request being proposed for +acceptance or rejection and briefly summarize the pull request. A minimum of +80% of the Core Team must vote and at least 2/3 of the votes must be positive +to carry out the proposed action (fractions of a vote rounded up to the nearest +integer). Since the BDFL holds ultimate authority in The Project, the BDFL has +authority to act alone in accepting or rejecting changes or overriding Core +Team decisions. From e990922f2c11997dc3f17cd1da02621014179222 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:24 -0500 Subject: [PATCH 046/184] New translations index.md (French) --- web/pandas/fr/about/index.md | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 web/pandas/fr/about/index.md diff --git a/web/pandas/fr/about/index.md b/web/pandas/fr/about/index.md new file mode 100644 index 000000000..5431693ca --- /dev/null +++ b/web/pandas/fr/about/index.md @@ -0,0 +1,86 @@ +# A propos de pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). +By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](https://cython.org) or C. + +- Python with _pandas_ is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. From 424e922759a818b96a704f37c6349555faaf1084 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:25 -0500 Subject: [PATCH 047/184] New translations index.md (Arabic) --- web/pandas/ar/about/index.md | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 web/pandas/ar/about/index.md diff --git a/web/pandas/ar/about/index.md b/web/pandas/ar/about/index.md new file mode 100644 index 000000000..deb04a9c2 --- /dev/null +++ b/web/pandas/ar/about/index.md @@ -0,0 +1,86 @@ +# About pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). +By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](https://cython.org) or C. + +- Python with _pandas_ is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. From 35a1e7ce5b1911bac5fa8093b677a11c6847e412 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:27 -0500 Subject: [PATCH 048/184] New translations index.md (Catalan) --- web/pandas/ca/about/index.md | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 web/pandas/ca/about/index.md diff --git a/web/pandas/ca/about/index.md b/web/pandas/ca/about/index.md new file mode 100644 index 000000000..deb04a9c2 --- /dev/null +++ b/web/pandas/ca/about/index.md @@ -0,0 +1,86 @@ +# About pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). +By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](https://cython.org) or C. + +- Python with _pandas_ is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. From 7ef01795ca211977a036ea009a96fd58a6f7e51f Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:28 -0500 Subject: [PATCH 049/184] New translations index.md (Japanese) --- web/pandas/ja/about/index.md | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 web/pandas/ja/about/index.md diff --git a/web/pandas/ja/about/index.md b/web/pandas/ja/about/index.md new file mode 100644 index 000000000..deb04a9c2 --- /dev/null +++ b/web/pandas/ja/about/index.md @@ -0,0 +1,86 @@ +# About pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). +By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](https://cython.org) or C. + +- Python with _pandas_ is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. From b5f00f4ce3cbfb85fdb3fc154bc43095352d8e78 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:29 -0500 Subject: [PATCH 050/184] New translations index.md (Korean) --- web/pandas/ko/about/index.md | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 web/pandas/ko/about/index.md diff --git a/web/pandas/ko/about/index.md b/web/pandas/ko/about/index.md new file mode 100644 index 000000000..deb04a9c2 --- /dev/null +++ b/web/pandas/ko/about/index.md @@ -0,0 +1,86 @@ +# About pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). +By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](https://cython.org) or C. + +- Python with _pandas_ is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. From 3500c18101cef4e0b0be67201282acad565f8a65 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:30 -0500 Subject: [PATCH 051/184] New translations index.md (Polish) --- web/pandas/pl/about/index.md | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 web/pandas/pl/about/index.md diff --git a/web/pandas/pl/about/index.md b/web/pandas/pl/about/index.md new file mode 100644 index 000000000..deb04a9c2 --- /dev/null +++ b/web/pandas/pl/about/index.md @@ -0,0 +1,86 @@ +# About pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). +By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](https://cython.org) or C. + +- Python with _pandas_ is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. From af9442fc20f275d7b4932f4301cb9c2ce46c6be3 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:32 -0500 Subject: [PATCH 052/184] New translations index.md (Russian) --- web/pandas/ru/about/index.md | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 web/pandas/ru/about/index.md diff --git a/web/pandas/ru/about/index.md b/web/pandas/ru/about/index.md new file mode 100644 index 000000000..deb04a9c2 --- /dev/null +++ b/web/pandas/ru/about/index.md @@ -0,0 +1,86 @@ +# About pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). +By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](https://cython.org) or C. + +- Python with _pandas_ is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. From b70f49e96bbf19eb9c7bfd42b93c0cf39d3edfec Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:33 -0500 Subject: [PATCH 053/184] New translations index.md (Chinese Simplified) --- web/pandas/zh/about/index.md | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 web/pandas/zh/about/index.md diff --git a/web/pandas/zh/about/index.md b/web/pandas/zh/about/index.md new file mode 100644 index 000000000..deb04a9c2 --- /dev/null +++ b/web/pandas/zh/about/index.md @@ -0,0 +1,86 @@ +# About pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). +By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](https://cython.org) or C. + +- Python with _pandas_ is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. From 46ee99ed59386c631a678e2c53194c89d21c68bf Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:34 -0500 Subject: [PATCH 054/184] New translations index.md (Persian) --- web/pandas/fa/about/index.md | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 web/pandas/fa/about/index.md diff --git a/web/pandas/fa/about/index.md b/web/pandas/fa/about/index.md new file mode 100644 index 000000000..deb04a9c2 --- /dev/null +++ b/web/pandas/fa/about/index.md @@ -0,0 +1,86 @@ +# About pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). +By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](https://cython.org) or C. + +- Python with _pandas_ is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. From e2b0cece8d3b8631a3c15eac1092dc3b619d0480 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:36 -0500 Subject: [PATCH 055/184] New translations index.md (Tamil) --- web/pandas/ta/about/index.md | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 web/pandas/ta/about/index.md diff --git a/web/pandas/ta/about/index.md b/web/pandas/ta/about/index.md new file mode 100644 index 000000000..deb04a9c2 --- /dev/null +++ b/web/pandas/ta/about/index.md @@ -0,0 +1,86 @@ +# About pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). +By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](https://cython.org) or C. + +- Python with _pandas_ is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. From 0d59a659b362516a6959f515ca6db677112be7a4 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:37 -0500 Subject: [PATCH 056/184] New translations index.md (Hindi) --- web/pandas/hi/about/index.md | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 web/pandas/hi/about/index.md diff --git a/web/pandas/hi/about/index.md b/web/pandas/hi/about/index.md new file mode 100644 index 000000000..deb04a9c2 --- /dev/null +++ b/web/pandas/hi/about/index.md @@ -0,0 +1,86 @@ +# About pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). +By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](https://cython.org) or C. + +- Python with _pandas_ is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. From f6174d9560e9cac84c415a80f237466048a47437 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:38 -0500 Subject: [PATCH 057/184] New translations roadmap.md (French) --- web/pandas/fr/about/roadmap.md | 197 +++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 web/pandas/fr/about/roadmap.md diff --git a/web/pandas/fr/about/roadmap.md b/web/pandas/fr/about/roadmap.md new file mode 100644 index 000000000..bd52e545a --- /dev/null +++ b/web/pandas/fr/about/roadmap.md @@ -0,0 +1,197 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will _necessarily_ +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item _not_ being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +The roadmap is defined as a set of major enhancement proposals named PDEPs. +For more information about PDEPs, and how to submit one, please refer to +[PEDP-1]({{ base_url }}pdeps/0001-purpose-and-guidelines.html). + +## PDEPs + +{% for pdep_type in ["Under discussion", "Accepted", "Implemented", "Rejected"] %} + +

{{ pdep_type.replace("_", " ").capitalize() }}

+ +
    +{% for pdep in pdeps[pdep_type] %} +
  • {{ pdep.title }}
  • +{% else %} +
  • There are currently no PDEPs with this status
  • +{% endfor %} +
+ +{% endfor %} + +## Roadmap points pending a PDEP + + + +### Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +### String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +### Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +### Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. Label indexing must never involve looking in an axis twice for the same label(s). + This implies that any validation step must either: + +- limit validation to general features (e.g. dtype/structure of the key/index), or +- reuse the result for the actual indexing. + +2. Indexers must never rely on an explicit call to other indexers. + For instance, it is OK to have some internal method of `.loc` call some + internal method of `__getitem__` (or of their common base class), + but never in the code flow of `.loc` should `the_obj[something]` appear. + +3. Execution of positional indexing must never involve labels (as currently, sadly, happens). + That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) + to `.iloc` should never involve the axes of the object in any way. + +4. Indexing must never involve accessing/modifying values (i.e., act on `._data` or `.values`) more than once. + The following steps must hence be clearly decoupled: + +- find positions we need to access/modify on each axis +- (if we are accessing) derive the type of object we need to return (dimensionality) +- actually access/modify the values +- (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, any code which deals on how data is stored + (including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) + must be independent from code that deals with identifying affected rows/columns, + and take place only once step 4.i is completed. + +- In particular, such code should most probably not live in `pandas/core/indexing.py` +- ... and must not depend in any way on the type(s) of axes (e.g. no `MultiIndex` special cases) + +6. As a corollary to point 1.i, `Index` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup, + on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. Use of trial and error should be limited, and anyway restricted to catch only exceptions + which are actually expected (typically `KeyError`). + +- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` + +8. Any code portion which is not specific to setters and getters must be shared, + and when small differences in behavior are expected (e.g. getting with `.loc` raises for + missing labels, setting still doesn't), they can be managed with a specific parameter. + +### Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +### Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +### Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs _before_ a PR is merged. + Currently, the benchmarks are only run nightly. From bb85c641ae62f9967728513a2636475e5ab9c408 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:40 -0500 Subject: [PATCH 058/184] New translations roadmap.md (Arabic) --- web/pandas/ar/about/roadmap.md | 197 +++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 web/pandas/ar/about/roadmap.md diff --git a/web/pandas/ar/about/roadmap.md b/web/pandas/ar/about/roadmap.md new file mode 100644 index 000000000..bd52e545a --- /dev/null +++ b/web/pandas/ar/about/roadmap.md @@ -0,0 +1,197 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will _necessarily_ +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item _not_ being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +The roadmap is defined as a set of major enhancement proposals named PDEPs. +For more information about PDEPs, and how to submit one, please refer to +[PEDP-1]({{ base_url }}pdeps/0001-purpose-and-guidelines.html). + +## PDEPs + +{% for pdep_type in ["Under discussion", "Accepted", "Implemented", "Rejected"] %} + +

{{ pdep_type.replace("_", " ").capitalize() }}

+ +
    +{% for pdep in pdeps[pdep_type] %} +
  • {{ pdep.title }}
  • +{% else %} +
  • There are currently no PDEPs with this status
  • +{% endfor %} +
+ +{% endfor %} + +## Roadmap points pending a PDEP + + + +### Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +### String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +### Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +### Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. Label indexing must never involve looking in an axis twice for the same label(s). + This implies that any validation step must either: + +- limit validation to general features (e.g. dtype/structure of the key/index), or +- reuse the result for the actual indexing. + +2. Indexers must never rely on an explicit call to other indexers. + For instance, it is OK to have some internal method of `.loc` call some + internal method of `__getitem__` (or of their common base class), + but never in the code flow of `.loc` should `the_obj[something]` appear. + +3. Execution of positional indexing must never involve labels (as currently, sadly, happens). + That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) + to `.iloc` should never involve the axes of the object in any way. + +4. Indexing must never involve accessing/modifying values (i.e., act on `._data` or `.values`) more than once. + The following steps must hence be clearly decoupled: + +- find positions we need to access/modify on each axis +- (if we are accessing) derive the type of object we need to return (dimensionality) +- actually access/modify the values +- (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, any code which deals on how data is stored + (including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) + must be independent from code that deals with identifying affected rows/columns, + and take place only once step 4.i is completed. + +- In particular, such code should most probably not live in `pandas/core/indexing.py` +- ... and must not depend in any way on the type(s) of axes (e.g. no `MultiIndex` special cases) + +6. As a corollary to point 1.i, `Index` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup, + on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. Use of trial and error should be limited, and anyway restricted to catch only exceptions + which are actually expected (typically `KeyError`). + +- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` + +8. Any code portion which is not specific to setters and getters must be shared, + and when small differences in behavior are expected (e.g. getting with `.loc` raises for + missing labels, setting still doesn't), they can be managed with a specific parameter. + +### Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +### Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +### Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs _before_ a PR is merged. + Currently, the benchmarks are only run nightly. From fa703ffa993253738189300d72240c6018d4d527 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:41 -0500 Subject: [PATCH 059/184] New translations roadmap.md (Catalan) --- web/pandas/ca/about/roadmap.md | 197 +++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 web/pandas/ca/about/roadmap.md diff --git a/web/pandas/ca/about/roadmap.md b/web/pandas/ca/about/roadmap.md new file mode 100644 index 000000000..bd52e545a --- /dev/null +++ b/web/pandas/ca/about/roadmap.md @@ -0,0 +1,197 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will _necessarily_ +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item _not_ being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +The roadmap is defined as a set of major enhancement proposals named PDEPs. +For more information about PDEPs, and how to submit one, please refer to +[PEDP-1]({{ base_url }}pdeps/0001-purpose-and-guidelines.html). + +## PDEPs + +{% for pdep_type in ["Under discussion", "Accepted", "Implemented", "Rejected"] %} + +

{{ pdep_type.replace("_", " ").capitalize() }}

+ +
    +{% for pdep in pdeps[pdep_type] %} +
  • {{ pdep.title }}
  • +{% else %} +
  • There are currently no PDEPs with this status
  • +{% endfor %} +
+ +{% endfor %} + +## Roadmap points pending a PDEP + + + +### Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +### String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +### Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +### Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. Label indexing must never involve looking in an axis twice for the same label(s). + This implies that any validation step must either: + +- limit validation to general features (e.g. dtype/structure of the key/index), or +- reuse the result for the actual indexing. + +2. Indexers must never rely on an explicit call to other indexers. + For instance, it is OK to have some internal method of `.loc` call some + internal method of `__getitem__` (or of their common base class), + but never in the code flow of `.loc` should `the_obj[something]` appear. + +3. Execution of positional indexing must never involve labels (as currently, sadly, happens). + That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) + to `.iloc` should never involve the axes of the object in any way. + +4. Indexing must never involve accessing/modifying values (i.e., act on `._data` or `.values`) more than once. + The following steps must hence be clearly decoupled: + +- find positions we need to access/modify on each axis +- (if we are accessing) derive the type of object we need to return (dimensionality) +- actually access/modify the values +- (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, any code which deals on how data is stored + (including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) + must be independent from code that deals with identifying affected rows/columns, + and take place only once step 4.i is completed. + +- In particular, such code should most probably not live in `pandas/core/indexing.py` +- ... and must not depend in any way on the type(s) of axes (e.g. no `MultiIndex` special cases) + +6. As a corollary to point 1.i, `Index` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup, + on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. Use of trial and error should be limited, and anyway restricted to catch only exceptions + which are actually expected (typically `KeyError`). + +- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` + +8. Any code portion which is not specific to setters and getters must be shared, + and when small differences in behavior are expected (e.g. getting with `.loc` raises for + missing labels, setting still doesn't), they can be managed with a specific parameter. + +### Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +### Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +### Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs _before_ a PR is merged. + Currently, the benchmarks are only run nightly. From 8d6faac9398e6799a07f719c6fa0e0df84828afb Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:42 -0500 Subject: [PATCH 060/184] New translations roadmap.md (Japanese) --- web/pandas/ja/about/roadmap.md | 197 +++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 web/pandas/ja/about/roadmap.md diff --git a/web/pandas/ja/about/roadmap.md b/web/pandas/ja/about/roadmap.md new file mode 100644 index 000000000..bd52e545a --- /dev/null +++ b/web/pandas/ja/about/roadmap.md @@ -0,0 +1,197 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will _necessarily_ +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item _not_ being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +The roadmap is defined as a set of major enhancement proposals named PDEPs. +For more information about PDEPs, and how to submit one, please refer to +[PEDP-1]({{ base_url }}pdeps/0001-purpose-and-guidelines.html). + +## PDEPs + +{% for pdep_type in ["Under discussion", "Accepted", "Implemented", "Rejected"] %} + +

{{ pdep_type.replace("_", " ").capitalize() }}

+ +
    +{% for pdep in pdeps[pdep_type] %} +
  • {{ pdep.title }}
  • +{% else %} +
  • There are currently no PDEPs with this status
  • +{% endfor %} +
+ +{% endfor %} + +## Roadmap points pending a PDEP + + + +### Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +### String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +### Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +### Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. Label indexing must never involve looking in an axis twice for the same label(s). + This implies that any validation step must either: + +- limit validation to general features (e.g. dtype/structure of the key/index), or +- reuse the result for the actual indexing. + +2. Indexers must never rely on an explicit call to other indexers. + For instance, it is OK to have some internal method of `.loc` call some + internal method of `__getitem__` (or of their common base class), + but never in the code flow of `.loc` should `the_obj[something]` appear. + +3. Execution of positional indexing must never involve labels (as currently, sadly, happens). + That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) + to `.iloc` should never involve the axes of the object in any way. + +4. Indexing must never involve accessing/modifying values (i.e., act on `._data` or `.values`) more than once. + The following steps must hence be clearly decoupled: + +- find positions we need to access/modify on each axis +- (if we are accessing) derive the type of object we need to return (dimensionality) +- actually access/modify the values +- (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, any code which deals on how data is stored + (including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) + must be independent from code that deals with identifying affected rows/columns, + and take place only once step 4.i is completed. + +- In particular, such code should most probably not live in `pandas/core/indexing.py` +- ... and must not depend in any way on the type(s) of axes (e.g. no `MultiIndex` special cases) + +6. As a corollary to point 1.i, `Index` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup, + on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. Use of trial and error should be limited, and anyway restricted to catch only exceptions + which are actually expected (typically `KeyError`). + +- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` + +8. Any code portion which is not specific to setters and getters must be shared, + and when small differences in behavior are expected (e.g. getting with `.loc` raises for + missing labels, setting still doesn't), they can be managed with a specific parameter. + +### Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +### Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +### Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs _before_ a PR is merged. + Currently, the benchmarks are only run nightly. From c717514a1c3cbb498748a0edd474b449c0a92ce6 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:44 -0500 Subject: [PATCH 061/184] New translations roadmap.md (Korean) --- web/pandas/ko/about/roadmap.md | 197 +++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 web/pandas/ko/about/roadmap.md diff --git a/web/pandas/ko/about/roadmap.md b/web/pandas/ko/about/roadmap.md new file mode 100644 index 000000000..bd52e545a --- /dev/null +++ b/web/pandas/ko/about/roadmap.md @@ -0,0 +1,197 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will _necessarily_ +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item _not_ being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +The roadmap is defined as a set of major enhancement proposals named PDEPs. +For more information about PDEPs, and how to submit one, please refer to +[PEDP-1]({{ base_url }}pdeps/0001-purpose-and-guidelines.html). + +## PDEPs + +{% for pdep_type in ["Under discussion", "Accepted", "Implemented", "Rejected"] %} + +

{{ pdep_type.replace("_", " ").capitalize() }}

+ +
    +{% for pdep in pdeps[pdep_type] %} +
  • {{ pdep.title }}
  • +{% else %} +
  • There are currently no PDEPs with this status
  • +{% endfor %} +
+ +{% endfor %} + +## Roadmap points pending a PDEP + + + +### Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +### String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +### Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +### Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. Label indexing must never involve looking in an axis twice for the same label(s). + This implies that any validation step must either: + +- limit validation to general features (e.g. dtype/structure of the key/index), or +- reuse the result for the actual indexing. + +2. Indexers must never rely on an explicit call to other indexers. + For instance, it is OK to have some internal method of `.loc` call some + internal method of `__getitem__` (or of their common base class), + but never in the code flow of `.loc` should `the_obj[something]` appear. + +3. Execution of positional indexing must never involve labels (as currently, sadly, happens). + That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) + to `.iloc` should never involve the axes of the object in any way. + +4. Indexing must never involve accessing/modifying values (i.e., act on `._data` or `.values`) more than once. + The following steps must hence be clearly decoupled: + +- find positions we need to access/modify on each axis +- (if we are accessing) derive the type of object we need to return (dimensionality) +- actually access/modify the values +- (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, any code which deals on how data is stored + (including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) + must be independent from code that deals with identifying affected rows/columns, + and take place only once step 4.i is completed. + +- In particular, such code should most probably not live in `pandas/core/indexing.py` +- ... and must not depend in any way on the type(s) of axes (e.g. no `MultiIndex` special cases) + +6. As a corollary to point 1.i, `Index` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup, + on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. Use of trial and error should be limited, and anyway restricted to catch only exceptions + which are actually expected (typically `KeyError`). + +- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` + +8. Any code portion which is not specific to setters and getters must be shared, + and when small differences in behavior are expected (e.g. getting with `.loc` raises for + missing labels, setting still doesn't), they can be managed with a specific parameter. + +### Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +### Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +### Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs _before_ a PR is merged. + Currently, the benchmarks are only run nightly. From 814a31ce99c346dadac13139a58013380c66bbb0 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:45 -0500 Subject: [PATCH 062/184] New translations roadmap.md (Polish) --- web/pandas/pl/about/roadmap.md | 197 +++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 web/pandas/pl/about/roadmap.md diff --git a/web/pandas/pl/about/roadmap.md b/web/pandas/pl/about/roadmap.md new file mode 100644 index 000000000..bd52e545a --- /dev/null +++ b/web/pandas/pl/about/roadmap.md @@ -0,0 +1,197 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will _necessarily_ +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item _not_ being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +The roadmap is defined as a set of major enhancement proposals named PDEPs. +For more information about PDEPs, and how to submit one, please refer to +[PEDP-1]({{ base_url }}pdeps/0001-purpose-and-guidelines.html). + +## PDEPs + +{% for pdep_type in ["Under discussion", "Accepted", "Implemented", "Rejected"] %} + +

{{ pdep_type.replace("_", " ").capitalize() }}

+ +
    +{% for pdep in pdeps[pdep_type] %} +
  • {{ pdep.title }}
  • +{% else %} +
  • There are currently no PDEPs with this status
  • +{% endfor %} +
+ +{% endfor %} + +## Roadmap points pending a PDEP + + + +### Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +### String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +### Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +### Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. Label indexing must never involve looking in an axis twice for the same label(s). + This implies that any validation step must either: + +- limit validation to general features (e.g. dtype/structure of the key/index), or +- reuse the result for the actual indexing. + +2. Indexers must never rely on an explicit call to other indexers. + For instance, it is OK to have some internal method of `.loc` call some + internal method of `__getitem__` (or of their common base class), + but never in the code flow of `.loc` should `the_obj[something]` appear. + +3. Execution of positional indexing must never involve labels (as currently, sadly, happens). + That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) + to `.iloc` should never involve the axes of the object in any way. + +4. Indexing must never involve accessing/modifying values (i.e., act on `._data` or `.values`) more than once. + The following steps must hence be clearly decoupled: + +- find positions we need to access/modify on each axis +- (if we are accessing) derive the type of object we need to return (dimensionality) +- actually access/modify the values +- (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, any code which deals on how data is stored + (including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) + must be independent from code that deals with identifying affected rows/columns, + and take place only once step 4.i is completed. + +- In particular, such code should most probably not live in `pandas/core/indexing.py` +- ... and must not depend in any way on the type(s) of axes (e.g. no `MultiIndex` special cases) + +6. As a corollary to point 1.i, `Index` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup, + on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. Use of trial and error should be limited, and anyway restricted to catch only exceptions + which are actually expected (typically `KeyError`). + +- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` + +8. Any code portion which is not specific to setters and getters must be shared, + and when small differences in behavior are expected (e.g. getting with `.loc` raises for + missing labels, setting still doesn't), they can be managed with a specific parameter. + +### Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +### Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +### Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs _before_ a PR is merged. + Currently, the benchmarks are only run nightly. From 182aa4c1c678b3c2c050b7115f3f08caa65d0a19 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:47 -0500 Subject: [PATCH 063/184] New translations roadmap.md (Russian) --- web/pandas/ru/about/roadmap.md | 197 +++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 web/pandas/ru/about/roadmap.md diff --git a/web/pandas/ru/about/roadmap.md b/web/pandas/ru/about/roadmap.md new file mode 100644 index 000000000..bd52e545a --- /dev/null +++ b/web/pandas/ru/about/roadmap.md @@ -0,0 +1,197 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will _necessarily_ +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item _not_ being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +The roadmap is defined as a set of major enhancement proposals named PDEPs. +For more information about PDEPs, and how to submit one, please refer to +[PEDP-1]({{ base_url }}pdeps/0001-purpose-and-guidelines.html). + +## PDEPs + +{% for pdep_type in ["Under discussion", "Accepted", "Implemented", "Rejected"] %} + +

{{ pdep_type.replace("_", " ").capitalize() }}

+ +
    +{% for pdep in pdeps[pdep_type] %} +
  • {{ pdep.title }}
  • +{% else %} +
  • There are currently no PDEPs with this status
  • +{% endfor %} +
+ +{% endfor %} + +## Roadmap points pending a PDEP + + + +### Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +### String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +### Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +### Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. Label indexing must never involve looking in an axis twice for the same label(s). + This implies that any validation step must either: + +- limit validation to general features (e.g. dtype/structure of the key/index), or +- reuse the result for the actual indexing. + +2. Indexers must never rely on an explicit call to other indexers. + For instance, it is OK to have some internal method of `.loc` call some + internal method of `__getitem__` (or of their common base class), + but never in the code flow of `.loc` should `the_obj[something]` appear. + +3. Execution of positional indexing must never involve labels (as currently, sadly, happens). + That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) + to `.iloc` should never involve the axes of the object in any way. + +4. Indexing must never involve accessing/modifying values (i.e., act on `._data` or `.values`) more than once. + The following steps must hence be clearly decoupled: + +- find positions we need to access/modify on each axis +- (if we are accessing) derive the type of object we need to return (dimensionality) +- actually access/modify the values +- (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, any code which deals on how data is stored + (including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) + must be independent from code that deals with identifying affected rows/columns, + and take place only once step 4.i is completed. + +- In particular, such code should most probably not live in `pandas/core/indexing.py` +- ... and must not depend in any way on the type(s) of axes (e.g. no `MultiIndex` special cases) + +6. As a corollary to point 1.i, `Index` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup, + on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. Use of trial and error should be limited, and anyway restricted to catch only exceptions + which are actually expected (typically `KeyError`). + +- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` + +8. Any code portion which is not specific to setters and getters must be shared, + and when small differences in behavior are expected (e.g. getting with `.loc` raises for + missing labels, setting still doesn't), they can be managed with a specific parameter. + +### Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +### Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +### Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs _before_ a PR is merged. + Currently, the benchmarks are only run nightly. From 59bdaac1e4b72c1787848ba3f71262cf920b9556 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:48 -0500 Subject: [PATCH 064/184] New translations roadmap.md (Chinese Simplified) --- web/pandas/zh/about/roadmap.md | 197 +++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 web/pandas/zh/about/roadmap.md diff --git a/web/pandas/zh/about/roadmap.md b/web/pandas/zh/about/roadmap.md new file mode 100644 index 000000000..bd52e545a --- /dev/null +++ b/web/pandas/zh/about/roadmap.md @@ -0,0 +1,197 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will _necessarily_ +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item _not_ being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +The roadmap is defined as a set of major enhancement proposals named PDEPs. +For more information about PDEPs, and how to submit one, please refer to +[PEDP-1]({{ base_url }}pdeps/0001-purpose-and-guidelines.html). + +## PDEPs + +{% for pdep_type in ["Under discussion", "Accepted", "Implemented", "Rejected"] %} + +

{{ pdep_type.replace("_", " ").capitalize() }}

+ +
    +{% for pdep in pdeps[pdep_type] %} +
  • {{ pdep.title }}
  • +{% else %} +
  • There are currently no PDEPs with this status
  • +{% endfor %} +
+ +{% endfor %} + +## Roadmap points pending a PDEP + + + +### Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +### String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +### Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +### Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. Label indexing must never involve looking in an axis twice for the same label(s). + This implies that any validation step must either: + +- limit validation to general features (e.g. dtype/structure of the key/index), or +- reuse the result for the actual indexing. + +2. Indexers must never rely on an explicit call to other indexers. + For instance, it is OK to have some internal method of `.loc` call some + internal method of `__getitem__` (or of their common base class), + but never in the code flow of `.loc` should `the_obj[something]` appear. + +3. Execution of positional indexing must never involve labels (as currently, sadly, happens). + That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) + to `.iloc` should never involve the axes of the object in any way. + +4. Indexing must never involve accessing/modifying values (i.e., act on `._data` or `.values`) more than once. + The following steps must hence be clearly decoupled: + +- find positions we need to access/modify on each axis +- (if we are accessing) derive the type of object we need to return (dimensionality) +- actually access/modify the values +- (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, any code which deals on how data is stored + (including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) + must be independent from code that deals with identifying affected rows/columns, + and take place only once step 4.i is completed. + +- In particular, such code should most probably not live in `pandas/core/indexing.py` +- ... and must not depend in any way on the type(s) of axes (e.g. no `MultiIndex` special cases) + +6. As a corollary to point 1.i, `Index` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup, + on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. Use of trial and error should be limited, and anyway restricted to catch only exceptions + which are actually expected (typically `KeyError`). + +- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` + +8. Any code portion which is not specific to setters and getters must be shared, + and when small differences in behavior are expected (e.g. getting with `.loc` raises for + missing labels, setting still doesn't), they can be managed with a specific parameter. + +### Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +### Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +### Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs _before_ a PR is merged. + Currently, the benchmarks are only run nightly. From abf8962bb40c8ef0d84e550e7fdcda13feedb821 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:49 -0500 Subject: [PATCH 065/184] New translations roadmap.md (Persian) --- web/pandas/fa/about/roadmap.md | 197 +++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 web/pandas/fa/about/roadmap.md diff --git a/web/pandas/fa/about/roadmap.md b/web/pandas/fa/about/roadmap.md new file mode 100644 index 000000000..bd52e545a --- /dev/null +++ b/web/pandas/fa/about/roadmap.md @@ -0,0 +1,197 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will _necessarily_ +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item _not_ being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +The roadmap is defined as a set of major enhancement proposals named PDEPs. +For more information about PDEPs, and how to submit one, please refer to +[PEDP-1]({{ base_url }}pdeps/0001-purpose-and-guidelines.html). + +## PDEPs + +{% for pdep_type in ["Under discussion", "Accepted", "Implemented", "Rejected"] %} + +

{{ pdep_type.replace("_", " ").capitalize() }}

+ +
    +{% for pdep in pdeps[pdep_type] %} +
  • {{ pdep.title }}
  • +{% else %} +
  • There are currently no PDEPs with this status
  • +{% endfor %} +
+ +{% endfor %} + +## Roadmap points pending a PDEP + + + +### Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +### String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +### Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +### Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. Label indexing must never involve looking in an axis twice for the same label(s). + This implies that any validation step must either: + +- limit validation to general features (e.g. dtype/structure of the key/index), or +- reuse the result for the actual indexing. + +2. Indexers must never rely on an explicit call to other indexers. + For instance, it is OK to have some internal method of `.loc` call some + internal method of `__getitem__` (or of their common base class), + but never in the code flow of `.loc` should `the_obj[something]` appear. + +3. Execution of positional indexing must never involve labels (as currently, sadly, happens). + That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) + to `.iloc` should never involve the axes of the object in any way. + +4. Indexing must never involve accessing/modifying values (i.e., act on `._data` or `.values`) more than once. + The following steps must hence be clearly decoupled: + +- find positions we need to access/modify on each axis +- (if we are accessing) derive the type of object we need to return (dimensionality) +- actually access/modify the values +- (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, any code which deals on how data is stored + (including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) + must be independent from code that deals with identifying affected rows/columns, + and take place only once step 4.i is completed. + +- In particular, such code should most probably not live in `pandas/core/indexing.py` +- ... and must not depend in any way on the type(s) of axes (e.g. no `MultiIndex` special cases) + +6. As a corollary to point 1.i, `Index` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup, + on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. Use of trial and error should be limited, and anyway restricted to catch only exceptions + which are actually expected (typically `KeyError`). + +- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` + +8. Any code portion which is not specific to setters and getters must be shared, + and when small differences in behavior are expected (e.g. getting with `.loc` raises for + missing labels, setting still doesn't), they can be managed with a specific parameter. + +### Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +### Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +### Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs _before_ a PR is merged. + Currently, the benchmarks are only run nightly. From b971b894332a4bd69fa6a5e4f08470613be3a418 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:51 -0500 Subject: [PATCH 066/184] New translations roadmap.md (Tamil) --- web/pandas/ta/about/roadmap.md | 197 +++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 web/pandas/ta/about/roadmap.md diff --git a/web/pandas/ta/about/roadmap.md b/web/pandas/ta/about/roadmap.md new file mode 100644 index 000000000..bd52e545a --- /dev/null +++ b/web/pandas/ta/about/roadmap.md @@ -0,0 +1,197 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will _necessarily_ +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item _not_ being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +The roadmap is defined as a set of major enhancement proposals named PDEPs. +For more information about PDEPs, and how to submit one, please refer to +[PEDP-1]({{ base_url }}pdeps/0001-purpose-and-guidelines.html). + +## PDEPs + +{% for pdep_type in ["Under discussion", "Accepted", "Implemented", "Rejected"] %} + +

{{ pdep_type.replace("_", " ").capitalize() }}

+ +
    +{% for pdep in pdeps[pdep_type] %} +
  • {{ pdep.title }}
  • +{% else %} +
  • There are currently no PDEPs with this status
  • +{% endfor %} +
+ +{% endfor %} + +## Roadmap points pending a PDEP + + + +### Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +### String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +### Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +### Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. Label indexing must never involve looking in an axis twice for the same label(s). + This implies that any validation step must either: + +- limit validation to general features (e.g. dtype/structure of the key/index), or +- reuse the result for the actual indexing. + +2. Indexers must never rely on an explicit call to other indexers. + For instance, it is OK to have some internal method of `.loc` call some + internal method of `__getitem__` (or of their common base class), + but never in the code flow of `.loc` should `the_obj[something]` appear. + +3. Execution of positional indexing must never involve labels (as currently, sadly, happens). + That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) + to `.iloc` should never involve the axes of the object in any way. + +4. Indexing must never involve accessing/modifying values (i.e., act on `._data` or `.values`) more than once. + The following steps must hence be clearly decoupled: + +- find positions we need to access/modify on each axis +- (if we are accessing) derive the type of object we need to return (dimensionality) +- actually access/modify the values +- (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, any code which deals on how data is stored + (including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) + must be independent from code that deals with identifying affected rows/columns, + and take place only once step 4.i is completed. + +- In particular, such code should most probably not live in `pandas/core/indexing.py` +- ... and must not depend in any way on the type(s) of axes (e.g. no `MultiIndex` special cases) + +6. As a corollary to point 1.i, `Index` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup, + on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. Use of trial and error should be limited, and anyway restricted to catch only exceptions + which are actually expected (typically `KeyError`). + +- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` + +8. Any code portion which is not specific to setters and getters must be shared, + and when small differences in behavior are expected (e.g. getting with `.loc` raises for + missing labels, setting still doesn't), they can be managed with a specific parameter. + +### Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +### Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +### Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs _before_ a PR is merged. + Currently, the benchmarks are only run nightly. From 372fc5d3080496dad34edce6a26e2a49161b9e16 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:52 -0500 Subject: [PATCH 067/184] New translations roadmap.md (Hindi) --- web/pandas/hi/about/roadmap.md | 197 +++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 web/pandas/hi/about/roadmap.md diff --git a/web/pandas/hi/about/roadmap.md b/web/pandas/hi/about/roadmap.md new file mode 100644 index 000000000..bd52e545a --- /dev/null +++ b/web/pandas/hi/about/roadmap.md @@ -0,0 +1,197 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will _necessarily_ +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item _not_ being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +The roadmap is defined as a set of major enhancement proposals named PDEPs. +For more information about PDEPs, and how to submit one, please refer to +[PEDP-1]({{ base_url }}pdeps/0001-purpose-and-guidelines.html). + +## PDEPs + +{% for pdep_type in ["Under discussion", "Accepted", "Implemented", "Rejected"] %} + +

{{ pdep_type.replace("_", " ").capitalize() }}

+ +
    +{% for pdep in pdeps[pdep_type] %} +
  • {{ pdep.title }}
  • +{% else %} +
  • There are currently no PDEPs with this status
  • +{% endfor %} +
+ +{% endfor %} + +## Roadmap points pending a PDEP + + + +### Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +### String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +### Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +### Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. Label indexing must never involve looking in an axis twice for the same label(s). + This implies that any validation step must either: + +- limit validation to general features (e.g. dtype/structure of the key/index), or +- reuse the result for the actual indexing. + +2. Indexers must never rely on an explicit call to other indexers. + For instance, it is OK to have some internal method of `.loc` call some + internal method of `__getitem__` (or of their common base class), + but never in the code flow of `.loc` should `the_obj[something]` appear. + +3. Execution of positional indexing must never involve labels (as currently, sadly, happens). + That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) + to `.iloc` should never involve the axes of the object in any way. + +4. Indexing must never involve accessing/modifying values (i.e., act on `._data` or `.values`) more than once. + The following steps must hence be clearly decoupled: + +- find positions we need to access/modify on each axis +- (if we are accessing) derive the type of object we need to return (dimensionality) +- actually access/modify the values +- (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, any code which deals on how data is stored + (including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) + must be independent from code that deals with identifying affected rows/columns, + and take place only once step 4.i is completed. + +- In particular, such code should most probably not live in `pandas/core/indexing.py` +- ... and must not depend in any way on the type(s) of axes (e.g. no `MultiIndex` special cases) + +6. As a corollary to point 1.i, `Index` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup, + on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. Use of trial and error should be limited, and anyway restricted to catch only exceptions + which are actually expected (typically `KeyError`). + +- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` + +8. Any code portion which is not specific to setters and getters must be shared, + and when small differences in behavior are expected (e.g. getting with `.loc` raises for + missing labels, setting still doesn't), they can be managed with a specific parameter. + +### Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +### Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +### Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs _before_ a PR is merged. + Currently, the benchmarks are only run nightly. From fcbe0eae4b0353e5b30654af9a717ed014fe4157 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:53 -0500 Subject: [PATCH 068/184] New translations sponsors.md (French) --- web/pandas/fr/about/sponsors.md | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 web/pandas/fr/about/sponsors.md diff --git a/web/pandas/fr/about/sponsors.md b/web/pandas/fr/about/sponsors.md new file mode 100644 index 000000000..4473a16cf --- /dev/null +++ b/web/pandas/fr/about/sponsors.md @@ -0,0 +1,60 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Become a sponsor + +As a free and open source project, _pandas_ relies on the support of the community of users for its development. +If you work for an organization that uses and benefits from _pandas_, please consider supporting pandas. There +are different ways, such as employing people to work on pandas, funding the project, or becoming a +[NumFOCUS sponsor](https://numfocus.org/sponsors) to support the broader ecosystem. Please contact us at +[admin@numfocus.org](mailto:admin@numfocus.org) to discuss. + +## Institutional partners + +Institutional partners are companies and universities that support the project by employing contributors. +Current institutional partners include: + +
    + {% for company in sponsors.active if company.kind == "partner" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Sponsors + +Sponsors are organizations that provide funding for pandas. Current sponsors include: + +
    + {% for company in sponsors.active if company.kind == "regular" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## In-kind sponsors + +In-kind sponsors are organizations that support pandas development with goods or services. +Current in-kind sponsors include: + +
    + {% for company in sponsors.inkind %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Past institutional partners + +
    + {% for company in sponsors.past if company.kind == "partner" %} +
  • {{ company.name }}
  • + {% endfor %} +
From 4703668cabcc1dda115d894b292791f85cebdc03 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:55 -0500 Subject: [PATCH 069/184] New translations sponsors.md (Arabic) --- web/pandas/ar/about/sponsors.md | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 web/pandas/ar/about/sponsors.md diff --git a/web/pandas/ar/about/sponsors.md b/web/pandas/ar/about/sponsors.md new file mode 100644 index 000000000..4473a16cf --- /dev/null +++ b/web/pandas/ar/about/sponsors.md @@ -0,0 +1,60 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Become a sponsor + +As a free and open source project, _pandas_ relies on the support of the community of users for its development. +If you work for an organization that uses and benefits from _pandas_, please consider supporting pandas. There +are different ways, such as employing people to work on pandas, funding the project, or becoming a +[NumFOCUS sponsor](https://numfocus.org/sponsors) to support the broader ecosystem. Please contact us at +[admin@numfocus.org](mailto:admin@numfocus.org) to discuss. + +## Institutional partners + +Institutional partners are companies and universities that support the project by employing contributors. +Current institutional partners include: + +
    + {% for company in sponsors.active if company.kind == "partner" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Sponsors + +Sponsors are organizations that provide funding for pandas. Current sponsors include: + +
    + {% for company in sponsors.active if company.kind == "regular" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## In-kind sponsors + +In-kind sponsors are organizations that support pandas development with goods or services. +Current in-kind sponsors include: + +
    + {% for company in sponsors.inkind %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Past institutional partners + +
    + {% for company in sponsors.past if company.kind == "partner" %} +
  • {{ company.name }}
  • + {% endfor %} +
From 4523e1f3d562bc7775363e40530c6777e34fdd08 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:56 -0500 Subject: [PATCH 070/184] New translations sponsors.md (Catalan) --- web/pandas/ca/about/sponsors.md | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 web/pandas/ca/about/sponsors.md diff --git a/web/pandas/ca/about/sponsors.md b/web/pandas/ca/about/sponsors.md new file mode 100644 index 000000000..4473a16cf --- /dev/null +++ b/web/pandas/ca/about/sponsors.md @@ -0,0 +1,60 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Become a sponsor + +As a free and open source project, _pandas_ relies on the support of the community of users for its development. +If you work for an organization that uses and benefits from _pandas_, please consider supporting pandas. There +are different ways, such as employing people to work on pandas, funding the project, or becoming a +[NumFOCUS sponsor](https://numfocus.org/sponsors) to support the broader ecosystem. Please contact us at +[admin@numfocus.org](mailto:admin@numfocus.org) to discuss. + +## Institutional partners + +Institutional partners are companies and universities that support the project by employing contributors. +Current institutional partners include: + +
    + {% for company in sponsors.active if company.kind == "partner" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Sponsors + +Sponsors are organizations that provide funding for pandas. Current sponsors include: + +
    + {% for company in sponsors.active if company.kind == "regular" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## In-kind sponsors + +In-kind sponsors are organizations that support pandas development with goods or services. +Current in-kind sponsors include: + +
    + {% for company in sponsors.inkind %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Past institutional partners + +
    + {% for company in sponsors.past if company.kind == "partner" %} +
  • {{ company.name }}
  • + {% endfor %} +
From 576a9a823306143e0c5662eb95beec5660d242a5 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:57 -0500 Subject: [PATCH 071/184] New translations sponsors.md (Japanese) --- web/pandas/ja/about/sponsors.md | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 web/pandas/ja/about/sponsors.md diff --git a/web/pandas/ja/about/sponsors.md b/web/pandas/ja/about/sponsors.md new file mode 100644 index 000000000..4473a16cf --- /dev/null +++ b/web/pandas/ja/about/sponsors.md @@ -0,0 +1,60 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Become a sponsor + +As a free and open source project, _pandas_ relies on the support of the community of users for its development. +If you work for an organization that uses and benefits from _pandas_, please consider supporting pandas. There +are different ways, such as employing people to work on pandas, funding the project, or becoming a +[NumFOCUS sponsor](https://numfocus.org/sponsors) to support the broader ecosystem. Please contact us at +[admin@numfocus.org](mailto:admin@numfocus.org) to discuss. + +## Institutional partners + +Institutional partners are companies and universities that support the project by employing contributors. +Current institutional partners include: + +
    + {% for company in sponsors.active if company.kind == "partner" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Sponsors + +Sponsors are organizations that provide funding for pandas. Current sponsors include: + +
    + {% for company in sponsors.active if company.kind == "regular" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## In-kind sponsors + +In-kind sponsors are organizations that support pandas development with goods or services. +Current in-kind sponsors include: + +
    + {% for company in sponsors.inkind %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Past institutional partners + +
    + {% for company in sponsors.past if company.kind == "partner" %} +
  • {{ company.name }}
  • + {% endfor %} +
From bbc918c752e5b6e8009f74ee3e2f08b18c5d254c Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:07:58 -0500 Subject: [PATCH 072/184] New translations sponsors.md (Korean) --- web/pandas/ko/about/sponsors.md | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 web/pandas/ko/about/sponsors.md diff --git a/web/pandas/ko/about/sponsors.md b/web/pandas/ko/about/sponsors.md new file mode 100644 index 000000000..4473a16cf --- /dev/null +++ b/web/pandas/ko/about/sponsors.md @@ -0,0 +1,60 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Become a sponsor + +As a free and open source project, _pandas_ relies on the support of the community of users for its development. +If you work for an organization that uses and benefits from _pandas_, please consider supporting pandas. There +are different ways, such as employing people to work on pandas, funding the project, or becoming a +[NumFOCUS sponsor](https://numfocus.org/sponsors) to support the broader ecosystem. Please contact us at +[admin@numfocus.org](mailto:admin@numfocus.org) to discuss. + +## Institutional partners + +Institutional partners are companies and universities that support the project by employing contributors. +Current institutional partners include: + +
    + {% for company in sponsors.active if company.kind == "partner" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Sponsors + +Sponsors are organizations that provide funding for pandas. Current sponsors include: + +
    + {% for company in sponsors.active if company.kind == "regular" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## In-kind sponsors + +In-kind sponsors are organizations that support pandas development with goods or services. +Current in-kind sponsors include: + +
    + {% for company in sponsors.inkind %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Past institutional partners + +
    + {% for company in sponsors.past if company.kind == "partner" %} +
  • {{ company.name }}
  • + {% endfor %} +
From 6ef7bf678f0ba1437d76ece4a84b9f235009110f Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:00 -0500 Subject: [PATCH 073/184] New translations sponsors.md (Polish) --- web/pandas/pl/about/sponsors.md | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 web/pandas/pl/about/sponsors.md diff --git a/web/pandas/pl/about/sponsors.md b/web/pandas/pl/about/sponsors.md new file mode 100644 index 000000000..4473a16cf --- /dev/null +++ b/web/pandas/pl/about/sponsors.md @@ -0,0 +1,60 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Become a sponsor + +As a free and open source project, _pandas_ relies on the support of the community of users for its development. +If you work for an organization that uses and benefits from _pandas_, please consider supporting pandas. There +are different ways, such as employing people to work on pandas, funding the project, or becoming a +[NumFOCUS sponsor](https://numfocus.org/sponsors) to support the broader ecosystem. Please contact us at +[admin@numfocus.org](mailto:admin@numfocus.org) to discuss. + +## Institutional partners + +Institutional partners are companies and universities that support the project by employing contributors. +Current institutional partners include: + +
    + {% for company in sponsors.active if company.kind == "partner" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Sponsors + +Sponsors are organizations that provide funding for pandas. Current sponsors include: + +
    + {% for company in sponsors.active if company.kind == "regular" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## In-kind sponsors + +In-kind sponsors are organizations that support pandas development with goods or services. +Current in-kind sponsors include: + +
    + {% for company in sponsors.inkind %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Past institutional partners + +
    + {% for company in sponsors.past if company.kind == "partner" %} +
  • {{ company.name }}
  • + {% endfor %} +
From bb5b570c12a64ca2e3cd1aeb53abede0c4166423 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:01 -0500 Subject: [PATCH 074/184] New translations sponsors.md (Russian) --- web/pandas/ru/about/sponsors.md | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 web/pandas/ru/about/sponsors.md diff --git a/web/pandas/ru/about/sponsors.md b/web/pandas/ru/about/sponsors.md new file mode 100644 index 000000000..4473a16cf --- /dev/null +++ b/web/pandas/ru/about/sponsors.md @@ -0,0 +1,60 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Become a sponsor + +As a free and open source project, _pandas_ relies on the support of the community of users for its development. +If you work for an organization that uses and benefits from _pandas_, please consider supporting pandas. There +are different ways, such as employing people to work on pandas, funding the project, or becoming a +[NumFOCUS sponsor](https://numfocus.org/sponsors) to support the broader ecosystem. Please contact us at +[admin@numfocus.org](mailto:admin@numfocus.org) to discuss. + +## Institutional partners + +Institutional partners are companies and universities that support the project by employing contributors. +Current institutional partners include: + +
    + {% for company in sponsors.active if company.kind == "partner" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Sponsors + +Sponsors are organizations that provide funding for pandas. Current sponsors include: + +
    + {% for company in sponsors.active if company.kind == "regular" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## In-kind sponsors + +In-kind sponsors are organizations that support pandas development with goods or services. +Current in-kind sponsors include: + +
    + {% for company in sponsors.inkind %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Past institutional partners + +
    + {% for company in sponsors.past if company.kind == "partner" %} +
  • {{ company.name }}
  • + {% endfor %} +
From ccd061579c3476ce8fb6824d165eb3dcf2ee9f21 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:02 -0500 Subject: [PATCH 075/184] New translations sponsors.md (Chinese Simplified) --- web/pandas/zh/about/sponsors.md | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 web/pandas/zh/about/sponsors.md diff --git a/web/pandas/zh/about/sponsors.md b/web/pandas/zh/about/sponsors.md new file mode 100644 index 000000000..4473a16cf --- /dev/null +++ b/web/pandas/zh/about/sponsors.md @@ -0,0 +1,60 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Become a sponsor + +As a free and open source project, _pandas_ relies on the support of the community of users for its development. +If you work for an organization that uses and benefits from _pandas_, please consider supporting pandas. There +are different ways, such as employing people to work on pandas, funding the project, or becoming a +[NumFOCUS sponsor](https://numfocus.org/sponsors) to support the broader ecosystem. Please contact us at +[admin@numfocus.org](mailto:admin@numfocus.org) to discuss. + +## Institutional partners + +Institutional partners are companies and universities that support the project by employing contributors. +Current institutional partners include: + +
    + {% for company in sponsors.active if company.kind == "partner" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Sponsors + +Sponsors are organizations that provide funding for pandas. Current sponsors include: + +
    + {% for company in sponsors.active if company.kind == "regular" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## In-kind sponsors + +In-kind sponsors are organizations that support pandas development with goods or services. +Current in-kind sponsors include: + +
    + {% for company in sponsors.inkind %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Past institutional partners + +
    + {% for company in sponsors.past if company.kind == "partner" %} +
  • {{ company.name }}
  • + {% endfor %} +
From ff620ea45e395712c581e9091930265949d59d46 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:04 -0500 Subject: [PATCH 076/184] New translations sponsors.md (Persian) --- web/pandas/fa/about/sponsors.md | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 web/pandas/fa/about/sponsors.md diff --git a/web/pandas/fa/about/sponsors.md b/web/pandas/fa/about/sponsors.md new file mode 100644 index 000000000..4473a16cf --- /dev/null +++ b/web/pandas/fa/about/sponsors.md @@ -0,0 +1,60 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Become a sponsor + +As a free and open source project, _pandas_ relies on the support of the community of users for its development. +If you work for an organization that uses and benefits from _pandas_, please consider supporting pandas. There +are different ways, such as employing people to work on pandas, funding the project, or becoming a +[NumFOCUS sponsor](https://numfocus.org/sponsors) to support the broader ecosystem. Please contact us at +[admin@numfocus.org](mailto:admin@numfocus.org) to discuss. + +## Institutional partners + +Institutional partners are companies and universities that support the project by employing contributors. +Current institutional partners include: + +
    + {% for company in sponsors.active if company.kind == "partner" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Sponsors + +Sponsors are organizations that provide funding for pandas. Current sponsors include: + +
    + {% for company in sponsors.active if company.kind == "regular" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## In-kind sponsors + +In-kind sponsors are organizations that support pandas development with goods or services. +Current in-kind sponsors include: + +
    + {% for company in sponsors.inkind %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Past institutional partners + +
    + {% for company in sponsors.past if company.kind == "partner" %} +
  • {{ company.name }}
  • + {% endfor %} +
From b07381c23a0dcf8133f0de41bc6904a560627de0 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:05 -0500 Subject: [PATCH 077/184] New translations sponsors.md (Tamil) --- web/pandas/ta/about/sponsors.md | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 web/pandas/ta/about/sponsors.md diff --git a/web/pandas/ta/about/sponsors.md b/web/pandas/ta/about/sponsors.md new file mode 100644 index 000000000..4473a16cf --- /dev/null +++ b/web/pandas/ta/about/sponsors.md @@ -0,0 +1,60 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Become a sponsor + +As a free and open source project, _pandas_ relies on the support of the community of users for its development. +If you work for an organization that uses and benefits from _pandas_, please consider supporting pandas. There +are different ways, such as employing people to work on pandas, funding the project, or becoming a +[NumFOCUS sponsor](https://numfocus.org/sponsors) to support the broader ecosystem. Please contact us at +[admin@numfocus.org](mailto:admin@numfocus.org) to discuss. + +## Institutional partners + +Institutional partners are companies and universities that support the project by employing contributors. +Current institutional partners include: + +
    + {% for company in sponsors.active if company.kind == "partner" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Sponsors + +Sponsors are organizations that provide funding for pandas. Current sponsors include: + +
    + {% for company in sponsors.active if company.kind == "regular" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## In-kind sponsors + +In-kind sponsors are organizations that support pandas development with goods or services. +Current in-kind sponsors include: + +
    + {% for company in sponsors.inkind %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Past institutional partners + +
    + {% for company in sponsors.past if company.kind == "partner" %} +
  • {{ company.name }}
  • + {% endfor %} +
From fa178c1311ecd47f33cb7325f21becf142060145 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:06 -0500 Subject: [PATCH 078/184] New translations sponsors.md (Hindi) --- web/pandas/hi/about/sponsors.md | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 web/pandas/hi/about/sponsors.md diff --git a/web/pandas/hi/about/sponsors.md b/web/pandas/hi/about/sponsors.md new file mode 100644 index 000000000..4473a16cf --- /dev/null +++ b/web/pandas/hi/about/sponsors.md @@ -0,0 +1,60 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Become a sponsor + +As a free and open source project, _pandas_ relies on the support of the community of users for its development. +If you work for an organization that uses and benefits from _pandas_, please consider supporting pandas. There +are different ways, such as employing people to work on pandas, funding the project, or becoming a +[NumFOCUS sponsor](https://numfocus.org/sponsors) to support the broader ecosystem. Please contact us at +[admin@numfocus.org](mailto:admin@numfocus.org) to discuss. + +## Institutional partners + +Institutional partners are companies and universities that support the project by employing contributors. +Current institutional partners include: + +
    + {% for company in sponsors.active if company.kind == "partner" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Sponsors + +Sponsors are organizations that provide funding for pandas. Current sponsors include: + +
    + {% for company in sponsors.active if company.kind == "regular" %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## In-kind sponsors + +In-kind sponsors are organizations that support pandas development with goods or services. +Current in-kind sponsors include: + +
    + {% for company in sponsors.inkind %} +
  • {{ company.name }}: {{ company.description }}
  • + {% endfor %} +
+ +## Past institutional partners + +
    + {% for company in sponsors.past if company.kind == "partner" %} +
  • {{ company.name }}
  • + {% endfor %} +
From 8f2130563f190999c079d099a90030756d47a58d Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:07 -0500 Subject: [PATCH 079/184] New translations team.md (French) --- web/pandas/fr/about/team.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 web/pandas/fr/about/team.md diff --git a/web/pandas/fr/about/team.md b/web/pandas/fr/about/team.md new file mode 100644 index 000000000..7a19fd7af --- /dev/null +++ b/web/pandas/fr/about/team.md @@ -0,0 +1,85 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [2,000 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page]({{ base_url }}donate.html). + +## Active maintainers + +
+ {% for username in maintainers.active %} + {% set person = maintainers.github_info.get(username) %} +
+ +
+
+ {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
+

{{ person.login }}

+
+
+ {% endfor %} +
+ +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({{ base_url }}community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## Governance + +The project governance is available in the [project governance page]({{ base_url }}about/governance.html). + +## Workgroups + +{% for k, workgroup in workgroups.items() %} + +### {{ workgroup.name }} + +
    +
  • Contact: + asp.{{ workgroup.contact }} + +
  • +
  • Responsibilities: {{ workgroup.responsibilities }}
  • +
  • Members: +
      + {% for person in workgroup.members %} +
    • {{ person }}{% if loop.first %} (lead){% endif %}
    • + {% endfor %} +
    +
  • +
+ +{% endfor %} + +## Inactive maintainers + + From a3f64115491a9fea80516964f61d2852ded416ce Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:09 -0500 Subject: [PATCH 080/184] New translations team.md (Arabic) --- web/pandas/ar/about/team.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 web/pandas/ar/about/team.md diff --git a/web/pandas/ar/about/team.md b/web/pandas/ar/about/team.md new file mode 100644 index 000000000..7a19fd7af --- /dev/null +++ b/web/pandas/ar/about/team.md @@ -0,0 +1,85 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [2,000 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page]({{ base_url }}donate.html). + +## Active maintainers + +
+ {% for username in maintainers.active %} + {% set person = maintainers.github_info.get(username) %} +
+ +
+
+ {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
+

{{ person.login }}

+
+
+ {% endfor %} +
+ +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({{ base_url }}community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## Governance + +The project governance is available in the [project governance page]({{ base_url }}about/governance.html). + +## Workgroups + +{% for k, workgroup in workgroups.items() %} + +### {{ workgroup.name }} + +
    +
  • Contact: + asp.{{ workgroup.contact }} + +
  • +
  • Responsibilities: {{ workgroup.responsibilities }}
  • +
  • Members: +
      + {% for person in workgroup.members %} +
    • {{ person }}{% if loop.first %} (lead){% endif %}
    • + {% endfor %} +
    +
  • +
+ +{% endfor %} + +## Inactive maintainers + + From 2c6d6f1b78fa9a2ca2675566dcce702a9806d423 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:10 -0500 Subject: [PATCH 081/184] New translations team.md (Catalan) --- web/pandas/ca/about/team.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 web/pandas/ca/about/team.md diff --git a/web/pandas/ca/about/team.md b/web/pandas/ca/about/team.md new file mode 100644 index 000000000..7a19fd7af --- /dev/null +++ b/web/pandas/ca/about/team.md @@ -0,0 +1,85 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [2,000 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page]({{ base_url }}donate.html). + +## Active maintainers + +
+ {% for username in maintainers.active %} + {% set person = maintainers.github_info.get(username) %} +
+ +
+
+ {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
+

{{ person.login }}

+
+
+ {% endfor %} +
+ +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({{ base_url }}community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## Governance + +The project governance is available in the [project governance page]({{ base_url }}about/governance.html). + +## Workgroups + +{% for k, workgroup in workgroups.items() %} + +### {{ workgroup.name }} + +
    +
  • Contact: + asp.{{ workgroup.contact }} + +
  • +
  • Responsibilities: {{ workgroup.responsibilities }}
  • +
  • Members: +
      + {% for person in workgroup.members %} +
    • {{ person }}{% if loop.first %} (lead){% endif %}
    • + {% endfor %} +
    +
  • +
+ +{% endfor %} + +## Inactive maintainers + + From 84c0eaed036a4d64050965c34c73e87d67c2ef43 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:11 -0500 Subject: [PATCH 082/184] New translations team.md (Japanese) --- web/pandas/ja/about/team.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 web/pandas/ja/about/team.md diff --git a/web/pandas/ja/about/team.md b/web/pandas/ja/about/team.md new file mode 100644 index 000000000..7a19fd7af --- /dev/null +++ b/web/pandas/ja/about/team.md @@ -0,0 +1,85 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [2,000 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page]({{ base_url }}donate.html). + +## Active maintainers + +
+ {% for username in maintainers.active %} + {% set person = maintainers.github_info.get(username) %} +
+ +
+
+ {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
+

{{ person.login }}

+
+
+ {% endfor %} +
+ +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({{ base_url }}community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## Governance + +The project governance is available in the [project governance page]({{ base_url }}about/governance.html). + +## Workgroups + +{% for k, workgroup in workgroups.items() %} + +### {{ workgroup.name }} + +
    +
  • Contact: + asp.{{ workgroup.contact }} + +
  • +
  • Responsibilities: {{ workgroup.responsibilities }}
  • +
  • Members: +
      + {% for person in workgroup.members %} +
    • {{ person }}{% if loop.first %} (lead){% endif %}
    • + {% endfor %} +
    +
  • +
+ +{% endfor %} + +## Inactive maintainers + + From dc827333c2268b0db1ce9721d5622364a265b6f6 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:12 -0500 Subject: [PATCH 083/184] New translations team.md (Korean) --- web/pandas/ko/about/team.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 web/pandas/ko/about/team.md diff --git a/web/pandas/ko/about/team.md b/web/pandas/ko/about/team.md new file mode 100644 index 000000000..7a19fd7af --- /dev/null +++ b/web/pandas/ko/about/team.md @@ -0,0 +1,85 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [2,000 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page]({{ base_url }}donate.html). + +## Active maintainers + +
+ {% for username in maintainers.active %} + {% set person = maintainers.github_info.get(username) %} +
+ +
+
+ {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
+

{{ person.login }}

+
+
+ {% endfor %} +
+ +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({{ base_url }}community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## Governance + +The project governance is available in the [project governance page]({{ base_url }}about/governance.html). + +## Workgroups + +{% for k, workgroup in workgroups.items() %} + +### {{ workgroup.name }} + +
    +
  • Contact: + asp.{{ workgroup.contact }} + +
  • +
  • Responsibilities: {{ workgroup.responsibilities }}
  • +
  • Members: +
      + {% for person in workgroup.members %} +
    • {{ person }}{% if loop.first %} (lead){% endif %}
    • + {% endfor %} +
    +
  • +
+ +{% endfor %} + +## Inactive maintainers + + From db35a0f42ba0a4ca41d16d8607f38ddb995ba91e Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:13 -0500 Subject: [PATCH 084/184] New translations team.md (Polish) --- web/pandas/pl/about/team.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 web/pandas/pl/about/team.md diff --git a/web/pandas/pl/about/team.md b/web/pandas/pl/about/team.md new file mode 100644 index 000000000..7a19fd7af --- /dev/null +++ b/web/pandas/pl/about/team.md @@ -0,0 +1,85 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [2,000 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page]({{ base_url }}donate.html). + +## Active maintainers + +
+ {% for username in maintainers.active %} + {% set person = maintainers.github_info.get(username) %} +
+ +
+
+ {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
+

{{ person.login }}

+
+
+ {% endfor %} +
+ +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({{ base_url }}community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## Governance + +The project governance is available in the [project governance page]({{ base_url }}about/governance.html). + +## Workgroups + +{% for k, workgroup in workgroups.items() %} + +### {{ workgroup.name }} + +
    +
  • Contact: + asp.{{ workgroup.contact }} + +
  • +
  • Responsibilities: {{ workgroup.responsibilities }}
  • +
  • Members: +
      + {% for person in workgroup.members %} +
    • {{ person }}{% if loop.first %} (lead){% endif %}
    • + {% endfor %} +
    +
  • +
+ +{% endfor %} + +## Inactive maintainers + + From 155d02cb35efeadd1d9098733dfec2e00d5e9f21 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:14 -0500 Subject: [PATCH 085/184] New translations team.md (Russian) --- web/pandas/ru/about/team.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 web/pandas/ru/about/team.md diff --git a/web/pandas/ru/about/team.md b/web/pandas/ru/about/team.md new file mode 100644 index 000000000..7a19fd7af --- /dev/null +++ b/web/pandas/ru/about/team.md @@ -0,0 +1,85 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [2,000 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page]({{ base_url }}donate.html). + +## Active maintainers + +
+ {% for username in maintainers.active %} + {% set person = maintainers.github_info.get(username) %} +
+ +
+
+ {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
+

{{ person.login }}

+
+
+ {% endfor %} +
+ +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({{ base_url }}community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## Governance + +The project governance is available in the [project governance page]({{ base_url }}about/governance.html). + +## Workgroups + +{% for k, workgroup in workgroups.items() %} + +### {{ workgroup.name }} + +
    +
  • Contact: + asp.{{ workgroup.contact }} + +
  • +
  • Responsibilities: {{ workgroup.responsibilities }}
  • +
  • Members: +
      + {% for person in workgroup.members %} +
    • {{ person }}{% if loop.first %} (lead){% endif %}
    • + {% endfor %} +
    +
  • +
+ +{% endfor %} + +## Inactive maintainers + + From 3e440207bd03b448302a60a7e480ee39a8787610 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:15 -0500 Subject: [PATCH 086/184] New translations team.md (Chinese Simplified) --- web/pandas/zh/about/team.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 web/pandas/zh/about/team.md diff --git a/web/pandas/zh/about/team.md b/web/pandas/zh/about/team.md new file mode 100644 index 000000000..7a19fd7af --- /dev/null +++ b/web/pandas/zh/about/team.md @@ -0,0 +1,85 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [2,000 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page]({{ base_url }}donate.html). + +## Active maintainers + +
+ {% for username in maintainers.active %} + {% set person = maintainers.github_info.get(username) %} +
+ +
+
+ {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
+

{{ person.login }}

+
+
+ {% endfor %} +
+ +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({{ base_url }}community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## Governance + +The project governance is available in the [project governance page]({{ base_url }}about/governance.html). + +## Workgroups + +{% for k, workgroup in workgroups.items() %} + +### {{ workgroup.name }} + +
    +
  • Contact: + asp.{{ workgroup.contact }} + +
  • +
  • Responsibilities: {{ workgroup.responsibilities }}
  • +
  • Members: +
      + {% for person in workgroup.members %} +
    • {{ person }}{% if loop.first %} (lead){% endif %}
    • + {% endfor %} +
    +
  • +
+ +{% endfor %} + +## Inactive maintainers + + From a6f537731c56b00599b75d4b7a154a7442ff2269 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:17 -0500 Subject: [PATCH 087/184] New translations team.md (Persian) --- web/pandas/fa/about/team.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 web/pandas/fa/about/team.md diff --git a/web/pandas/fa/about/team.md b/web/pandas/fa/about/team.md new file mode 100644 index 000000000..ec26b761c --- /dev/null +++ b/web/pandas/fa/about/team.md @@ -0,0 +1,85 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [2,000 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page]({{ base_url }}donate.html). + +## Active maintainers + +
+ {% for username in maintainers.active %} + {% set person = maintainers.github_info.get(username) %} +
+ +
+
+ {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
+

{{ person.login }}

+
+
+ {% endfor %} +
+ +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({{ base_url }}community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## دستور کار (حاکمیت) + +The project governance is available in the [project governance page]({{ base_url }}about/governance.html). + +## Workgroups + +{% for k, workgroup in workgroups.items() %} + +### {{ workgroup.name }} + +
    +
  • Contact: + asp.{{ workgroup.contact }} + +
  • +
  • Responsibilities: {{ workgroup.responsibilities }}
  • +
  • Members: +
      + {% for person in workgroup.members %} +
    • {{ person }}{% if loop.first %} (lead){% endif %}
    • + {% endfor %} +
    +
  • +
+ +{% endfor %} + +## Inactive maintainers + + From 369d681a6adccccd8442e1fb39e55bc3bd5914eb Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:18 -0500 Subject: [PATCH 088/184] New translations team.md (Tamil) --- web/pandas/ta/about/team.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 web/pandas/ta/about/team.md diff --git a/web/pandas/ta/about/team.md b/web/pandas/ta/about/team.md new file mode 100644 index 000000000..7a19fd7af --- /dev/null +++ b/web/pandas/ta/about/team.md @@ -0,0 +1,85 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [2,000 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page]({{ base_url }}donate.html). + +## Active maintainers + +
+ {% for username in maintainers.active %} + {% set person = maintainers.github_info.get(username) %} +
+ +
+
+ {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
+

{{ person.login }}

+
+
+ {% endfor %} +
+ +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({{ base_url }}community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## Governance + +The project governance is available in the [project governance page]({{ base_url }}about/governance.html). + +## Workgroups + +{% for k, workgroup in workgroups.items() %} + +### {{ workgroup.name }} + +
    +
  • Contact: + asp.{{ workgroup.contact }} + +
  • +
  • Responsibilities: {{ workgroup.responsibilities }}
  • +
  • Members: +
      + {% for person in workgroup.members %} +
    • {{ person }}{% if loop.first %} (lead){% endif %}
    • + {% endfor %} +
    +
  • +
+ +{% endfor %} + +## Inactive maintainers + + From c03e2d8a49d89b5cf0287b78aa5158c8343c22e8 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:19 -0500 Subject: [PATCH 089/184] New translations team.md (Hindi) --- web/pandas/hi/about/team.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 web/pandas/hi/about/team.md diff --git a/web/pandas/hi/about/team.md b/web/pandas/hi/about/team.md new file mode 100644 index 000000000..7a19fd7af --- /dev/null +++ b/web/pandas/hi/about/team.md @@ -0,0 +1,85 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [2,000 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page]({{ base_url }}donate.html). + +## Active maintainers + +
+ {% for username in maintainers.active %} + {% set person = maintainers.github_info.get(username) %} +
+ +
+
+ {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
+

{{ person.login }}

+
+
+ {% endfor %} +
+ +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({{ base_url }}community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## Governance + +The project governance is available in the [project governance page]({{ base_url }}about/governance.html). + +## Workgroups + +{% for k, workgroup in workgroups.items() %} + +### {{ workgroup.name }} + +
    +
  • Contact: + asp.{{ workgroup.contact }} + +
  • +
  • Responsibilities: {{ workgroup.responsibilities }}
  • +
  • Members: +
      + {% for person in workgroup.members %} +
    • {{ person }}{% if loop.first %} (lead){% endif %}
    • + {% endfor %} +
    +
  • +
+ +{% endfor %} + +## Inactive maintainers + + From 60301ebd6003c04749c65857e6d7d26abcc265a5 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:21 -0500 Subject: [PATCH 090/184] New translations benchmarks.md (French) --- web/pandas/fr/community/benchmarks.md | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/fr/community/benchmarks.md diff --git a/web/pandas/fr/community/benchmarks.md b/web/pandas/fr/community/benchmarks.md new file mode 100644 index 000000000..5d05916fb --- /dev/null +++ b/web/pandas/fr/community/benchmarks.md @@ -0,0 +1,40 @@ +# Benchmarks + +Benchmarks are tests to measure the performance of pandas. There are two different +kinds of benchmarks relevant to pandas: + +- Internal pandas benchmarks to measure speed and memory usage over time +- Community benchmarks comparing the speed or memory usage of different tools at + doing the same job + +## pandas benchmarks + +pandas benchmarks are implemented in the [asv_bench](https://github.com/pandas-dev/pandas/tree/main/asv_bench) +directory of our repository. The benchmarks are implemented for the +[airspeed velocity](https://asv.readthedocs.io/en/latest/) (asv for short) framework. + +The benchmarks can be run locally by any pandas developer. This can be done +with the `asv run` command, and it can be useful to detect if local changes have +an impact in performance, by running the benchmarks before and after the changes. +More information on running the performance test suite is found +[here](https://pandas.pydata.org/docs/dev/development/contributing_codebase.html#running-the-performance-test-suite). + +Note that benchmarks are not deterministic, and running in different hardware or +running in the same hardware with different levels of stress have a big impact in +the result. Even running the benchmarks with identical hardware and almost identical +conditions can produce significant differences when running the same exact code. + +## Automated benchmark runner + +The [asv-runner](https://github.com/pandas-dev/asv-runner/) repository automatically runs the pandas asv benchmark suite +for every (or almost every) commit to the `main` branch. It is run on GitHub actions. +See the linked repository for more details. The results are available at: + +https://pandas-dev.github.io/asv-runner/ + +## Community benchmarks + +The main benchmarks comparing dataframe tools that include pandas are: + +- [DuckDB (former H2O.ai) benchmarks](https://duckdblabs.github.io/db-benchmark/) +- [TPCH benchmarks](https://pola.rs/posts/benchmarks/) From 6161fa24d05ab14f5783248e384883f2c166de7d Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:22 -0500 Subject: [PATCH 091/184] New translations benchmarks.md (Arabic) --- web/pandas/ar/community/benchmarks.md | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/ar/community/benchmarks.md diff --git a/web/pandas/ar/community/benchmarks.md b/web/pandas/ar/community/benchmarks.md new file mode 100644 index 000000000..5d05916fb --- /dev/null +++ b/web/pandas/ar/community/benchmarks.md @@ -0,0 +1,40 @@ +# Benchmarks + +Benchmarks are tests to measure the performance of pandas. There are two different +kinds of benchmarks relevant to pandas: + +- Internal pandas benchmarks to measure speed and memory usage over time +- Community benchmarks comparing the speed or memory usage of different tools at + doing the same job + +## pandas benchmarks + +pandas benchmarks are implemented in the [asv_bench](https://github.com/pandas-dev/pandas/tree/main/asv_bench) +directory of our repository. The benchmarks are implemented for the +[airspeed velocity](https://asv.readthedocs.io/en/latest/) (asv for short) framework. + +The benchmarks can be run locally by any pandas developer. This can be done +with the `asv run` command, and it can be useful to detect if local changes have +an impact in performance, by running the benchmarks before and after the changes. +More information on running the performance test suite is found +[here](https://pandas.pydata.org/docs/dev/development/contributing_codebase.html#running-the-performance-test-suite). + +Note that benchmarks are not deterministic, and running in different hardware or +running in the same hardware with different levels of stress have a big impact in +the result. Even running the benchmarks with identical hardware and almost identical +conditions can produce significant differences when running the same exact code. + +## Automated benchmark runner + +The [asv-runner](https://github.com/pandas-dev/asv-runner/) repository automatically runs the pandas asv benchmark suite +for every (or almost every) commit to the `main` branch. It is run on GitHub actions. +See the linked repository for more details. The results are available at: + +https://pandas-dev.github.io/asv-runner/ + +## Community benchmarks + +The main benchmarks comparing dataframe tools that include pandas are: + +- [DuckDB (former H2O.ai) benchmarks](https://duckdblabs.github.io/db-benchmark/) +- [TPCH benchmarks](https://pola.rs/posts/benchmarks/) From f598b459ef9ba7fae1f2fcba4299f7473042eccc Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:23 -0500 Subject: [PATCH 092/184] New translations benchmarks.md (Catalan) --- web/pandas/ca/community/benchmarks.md | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/ca/community/benchmarks.md diff --git a/web/pandas/ca/community/benchmarks.md b/web/pandas/ca/community/benchmarks.md new file mode 100644 index 000000000..5d05916fb --- /dev/null +++ b/web/pandas/ca/community/benchmarks.md @@ -0,0 +1,40 @@ +# Benchmarks + +Benchmarks are tests to measure the performance of pandas. There are two different +kinds of benchmarks relevant to pandas: + +- Internal pandas benchmarks to measure speed and memory usage over time +- Community benchmarks comparing the speed or memory usage of different tools at + doing the same job + +## pandas benchmarks + +pandas benchmarks are implemented in the [asv_bench](https://github.com/pandas-dev/pandas/tree/main/asv_bench) +directory of our repository. The benchmarks are implemented for the +[airspeed velocity](https://asv.readthedocs.io/en/latest/) (asv for short) framework. + +The benchmarks can be run locally by any pandas developer. This can be done +with the `asv run` command, and it can be useful to detect if local changes have +an impact in performance, by running the benchmarks before and after the changes. +More information on running the performance test suite is found +[here](https://pandas.pydata.org/docs/dev/development/contributing_codebase.html#running-the-performance-test-suite). + +Note that benchmarks are not deterministic, and running in different hardware or +running in the same hardware with different levels of stress have a big impact in +the result. Even running the benchmarks with identical hardware and almost identical +conditions can produce significant differences when running the same exact code. + +## Automated benchmark runner + +The [asv-runner](https://github.com/pandas-dev/asv-runner/) repository automatically runs the pandas asv benchmark suite +for every (or almost every) commit to the `main` branch. It is run on GitHub actions. +See the linked repository for more details. The results are available at: + +https://pandas-dev.github.io/asv-runner/ + +## Community benchmarks + +The main benchmarks comparing dataframe tools that include pandas are: + +- [DuckDB (former H2O.ai) benchmarks](https://duckdblabs.github.io/db-benchmark/) +- [TPCH benchmarks](https://pola.rs/posts/benchmarks/) From c207110cac2ad860e6d024faff5ad34f17096975 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:25 -0500 Subject: [PATCH 093/184] New translations benchmarks.md (Japanese) --- web/pandas/ja/community/benchmarks.md | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/ja/community/benchmarks.md diff --git a/web/pandas/ja/community/benchmarks.md b/web/pandas/ja/community/benchmarks.md new file mode 100644 index 000000000..5d05916fb --- /dev/null +++ b/web/pandas/ja/community/benchmarks.md @@ -0,0 +1,40 @@ +# Benchmarks + +Benchmarks are tests to measure the performance of pandas. There are two different +kinds of benchmarks relevant to pandas: + +- Internal pandas benchmarks to measure speed and memory usage over time +- Community benchmarks comparing the speed or memory usage of different tools at + doing the same job + +## pandas benchmarks + +pandas benchmarks are implemented in the [asv_bench](https://github.com/pandas-dev/pandas/tree/main/asv_bench) +directory of our repository. The benchmarks are implemented for the +[airspeed velocity](https://asv.readthedocs.io/en/latest/) (asv for short) framework. + +The benchmarks can be run locally by any pandas developer. This can be done +with the `asv run` command, and it can be useful to detect if local changes have +an impact in performance, by running the benchmarks before and after the changes. +More information on running the performance test suite is found +[here](https://pandas.pydata.org/docs/dev/development/contributing_codebase.html#running-the-performance-test-suite). + +Note that benchmarks are not deterministic, and running in different hardware or +running in the same hardware with different levels of stress have a big impact in +the result. Even running the benchmarks with identical hardware and almost identical +conditions can produce significant differences when running the same exact code. + +## Automated benchmark runner + +The [asv-runner](https://github.com/pandas-dev/asv-runner/) repository automatically runs the pandas asv benchmark suite +for every (or almost every) commit to the `main` branch. It is run on GitHub actions. +See the linked repository for more details. The results are available at: + +https://pandas-dev.github.io/asv-runner/ + +## Community benchmarks + +The main benchmarks comparing dataframe tools that include pandas are: + +- [DuckDB (former H2O.ai) benchmarks](https://duckdblabs.github.io/db-benchmark/) +- [TPCH benchmarks](https://pola.rs/posts/benchmarks/) From 1efa42175528d15642de0916e54073df70875e06 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:26 -0500 Subject: [PATCH 094/184] New translations benchmarks.md (Korean) --- web/pandas/ko/community/benchmarks.md | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/ko/community/benchmarks.md diff --git a/web/pandas/ko/community/benchmarks.md b/web/pandas/ko/community/benchmarks.md new file mode 100644 index 000000000..5d05916fb --- /dev/null +++ b/web/pandas/ko/community/benchmarks.md @@ -0,0 +1,40 @@ +# Benchmarks + +Benchmarks are tests to measure the performance of pandas. There are two different +kinds of benchmarks relevant to pandas: + +- Internal pandas benchmarks to measure speed and memory usage over time +- Community benchmarks comparing the speed or memory usage of different tools at + doing the same job + +## pandas benchmarks + +pandas benchmarks are implemented in the [asv_bench](https://github.com/pandas-dev/pandas/tree/main/asv_bench) +directory of our repository. The benchmarks are implemented for the +[airspeed velocity](https://asv.readthedocs.io/en/latest/) (asv for short) framework. + +The benchmarks can be run locally by any pandas developer. This can be done +with the `asv run` command, and it can be useful to detect if local changes have +an impact in performance, by running the benchmarks before and after the changes. +More information on running the performance test suite is found +[here](https://pandas.pydata.org/docs/dev/development/contributing_codebase.html#running-the-performance-test-suite). + +Note that benchmarks are not deterministic, and running in different hardware or +running in the same hardware with different levels of stress have a big impact in +the result. Even running the benchmarks with identical hardware and almost identical +conditions can produce significant differences when running the same exact code. + +## Automated benchmark runner + +The [asv-runner](https://github.com/pandas-dev/asv-runner/) repository automatically runs the pandas asv benchmark suite +for every (or almost every) commit to the `main` branch. It is run on GitHub actions. +See the linked repository for more details. The results are available at: + +https://pandas-dev.github.io/asv-runner/ + +## Community benchmarks + +The main benchmarks comparing dataframe tools that include pandas are: + +- [DuckDB (former H2O.ai) benchmarks](https://duckdblabs.github.io/db-benchmark/) +- [TPCH benchmarks](https://pola.rs/posts/benchmarks/) From 1921e8b33c06e1836fcab849123db2050e97e76c Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:27 -0500 Subject: [PATCH 095/184] New translations benchmarks.md (Polish) --- web/pandas/pl/community/benchmarks.md | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/pl/community/benchmarks.md diff --git a/web/pandas/pl/community/benchmarks.md b/web/pandas/pl/community/benchmarks.md new file mode 100644 index 000000000..5d05916fb --- /dev/null +++ b/web/pandas/pl/community/benchmarks.md @@ -0,0 +1,40 @@ +# Benchmarks + +Benchmarks are tests to measure the performance of pandas. There are two different +kinds of benchmarks relevant to pandas: + +- Internal pandas benchmarks to measure speed and memory usage over time +- Community benchmarks comparing the speed or memory usage of different tools at + doing the same job + +## pandas benchmarks + +pandas benchmarks are implemented in the [asv_bench](https://github.com/pandas-dev/pandas/tree/main/asv_bench) +directory of our repository. The benchmarks are implemented for the +[airspeed velocity](https://asv.readthedocs.io/en/latest/) (asv for short) framework. + +The benchmarks can be run locally by any pandas developer. This can be done +with the `asv run` command, and it can be useful to detect if local changes have +an impact in performance, by running the benchmarks before and after the changes. +More information on running the performance test suite is found +[here](https://pandas.pydata.org/docs/dev/development/contributing_codebase.html#running-the-performance-test-suite). + +Note that benchmarks are not deterministic, and running in different hardware or +running in the same hardware with different levels of stress have a big impact in +the result. Even running the benchmarks with identical hardware and almost identical +conditions can produce significant differences when running the same exact code. + +## Automated benchmark runner + +The [asv-runner](https://github.com/pandas-dev/asv-runner/) repository automatically runs the pandas asv benchmark suite +for every (or almost every) commit to the `main` branch. It is run on GitHub actions. +See the linked repository for more details. The results are available at: + +https://pandas-dev.github.io/asv-runner/ + +## Community benchmarks + +The main benchmarks comparing dataframe tools that include pandas are: + +- [DuckDB (former H2O.ai) benchmarks](https://duckdblabs.github.io/db-benchmark/) +- [TPCH benchmarks](https://pola.rs/posts/benchmarks/) From ea00c80da19f79ef98be9feb0485fb560253aa19 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:28 -0500 Subject: [PATCH 096/184] New translations benchmarks.md (Russian) --- web/pandas/ru/community/benchmarks.md | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/ru/community/benchmarks.md diff --git a/web/pandas/ru/community/benchmarks.md b/web/pandas/ru/community/benchmarks.md new file mode 100644 index 000000000..5d05916fb --- /dev/null +++ b/web/pandas/ru/community/benchmarks.md @@ -0,0 +1,40 @@ +# Benchmarks + +Benchmarks are tests to measure the performance of pandas. There are two different +kinds of benchmarks relevant to pandas: + +- Internal pandas benchmarks to measure speed and memory usage over time +- Community benchmarks comparing the speed or memory usage of different tools at + doing the same job + +## pandas benchmarks + +pandas benchmarks are implemented in the [asv_bench](https://github.com/pandas-dev/pandas/tree/main/asv_bench) +directory of our repository. The benchmarks are implemented for the +[airspeed velocity](https://asv.readthedocs.io/en/latest/) (asv for short) framework. + +The benchmarks can be run locally by any pandas developer. This can be done +with the `asv run` command, and it can be useful to detect if local changes have +an impact in performance, by running the benchmarks before and after the changes. +More information on running the performance test suite is found +[here](https://pandas.pydata.org/docs/dev/development/contributing_codebase.html#running-the-performance-test-suite). + +Note that benchmarks are not deterministic, and running in different hardware or +running in the same hardware with different levels of stress have a big impact in +the result. Even running the benchmarks with identical hardware and almost identical +conditions can produce significant differences when running the same exact code. + +## Automated benchmark runner + +The [asv-runner](https://github.com/pandas-dev/asv-runner/) repository automatically runs the pandas asv benchmark suite +for every (or almost every) commit to the `main` branch. It is run on GitHub actions. +See the linked repository for more details. The results are available at: + +https://pandas-dev.github.io/asv-runner/ + +## Community benchmarks + +The main benchmarks comparing dataframe tools that include pandas are: + +- [DuckDB (former H2O.ai) benchmarks](https://duckdblabs.github.io/db-benchmark/) +- [TPCH benchmarks](https://pola.rs/posts/benchmarks/) From a24ab85461ff9fed6c547aaa1fc87cad4bb50b6c Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:29 -0500 Subject: [PATCH 097/184] New translations benchmarks.md (Chinese Simplified) --- web/pandas/zh/community/benchmarks.md | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/zh/community/benchmarks.md diff --git a/web/pandas/zh/community/benchmarks.md b/web/pandas/zh/community/benchmarks.md new file mode 100644 index 000000000..5d05916fb --- /dev/null +++ b/web/pandas/zh/community/benchmarks.md @@ -0,0 +1,40 @@ +# Benchmarks + +Benchmarks are tests to measure the performance of pandas. There are two different +kinds of benchmarks relevant to pandas: + +- Internal pandas benchmarks to measure speed and memory usage over time +- Community benchmarks comparing the speed or memory usage of different tools at + doing the same job + +## pandas benchmarks + +pandas benchmarks are implemented in the [asv_bench](https://github.com/pandas-dev/pandas/tree/main/asv_bench) +directory of our repository. The benchmarks are implemented for the +[airspeed velocity](https://asv.readthedocs.io/en/latest/) (asv for short) framework. + +The benchmarks can be run locally by any pandas developer. This can be done +with the `asv run` command, and it can be useful to detect if local changes have +an impact in performance, by running the benchmarks before and after the changes. +More information on running the performance test suite is found +[here](https://pandas.pydata.org/docs/dev/development/contributing_codebase.html#running-the-performance-test-suite). + +Note that benchmarks are not deterministic, and running in different hardware or +running in the same hardware with different levels of stress have a big impact in +the result. Even running the benchmarks with identical hardware and almost identical +conditions can produce significant differences when running the same exact code. + +## Automated benchmark runner + +The [asv-runner](https://github.com/pandas-dev/asv-runner/) repository automatically runs the pandas asv benchmark suite +for every (or almost every) commit to the `main` branch. It is run on GitHub actions. +See the linked repository for more details. The results are available at: + +https://pandas-dev.github.io/asv-runner/ + +## Community benchmarks + +The main benchmarks comparing dataframe tools that include pandas are: + +- [DuckDB (former H2O.ai) benchmarks](https://duckdblabs.github.io/db-benchmark/) +- [TPCH benchmarks](https://pola.rs/posts/benchmarks/) From 093488d685be72d3bac967fa558573e39c7eda44 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:30 -0500 Subject: [PATCH 098/184] New translations benchmarks.md (Persian) --- web/pandas/fa/community/benchmarks.md | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/fa/community/benchmarks.md diff --git a/web/pandas/fa/community/benchmarks.md b/web/pandas/fa/community/benchmarks.md new file mode 100644 index 000000000..5d05916fb --- /dev/null +++ b/web/pandas/fa/community/benchmarks.md @@ -0,0 +1,40 @@ +# Benchmarks + +Benchmarks are tests to measure the performance of pandas. There are two different +kinds of benchmarks relevant to pandas: + +- Internal pandas benchmarks to measure speed and memory usage over time +- Community benchmarks comparing the speed or memory usage of different tools at + doing the same job + +## pandas benchmarks + +pandas benchmarks are implemented in the [asv_bench](https://github.com/pandas-dev/pandas/tree/main/asv_bench) +directory of our repository. The benchmarks are implemented for the +[airspeed velocity](https://asv.readthedocs.io/en/latest/) (asv for short) framework. + +The benchmarks can be run locally by any pandas developer. This can be done +with the `asv run` command, and it can be useful to detect if local changes have +an impact in performance, by running the benchmarks before and after the changes. +More information on running the performance test suite is found +[here](https://pandas.pydata.org/docs/dev/development/contributing_codebase.html#running-the-performance-test-suite). + +Note that benchmarks are not deterministic, and running in different hardware or +running in the same hardware with different levels of stress have a big impact in +the result. Even running the benchmarks with identical hardware and almost identical +conditions can produce significant differences when running the same exact code. + +## Automated benchmark runner + +The [asv-runner](https://github.com/pandas-dev/asv-runner/) repository automatically runs the pandas asv benchmark suite +for every (or almost every) commit to the `main` branch. It is run on GitHub actions. +See the linked repository for more details. The results are available at: + +https://pandas-dev.github.io/asv-runner/ + +## Community benchmarks + +The main benchmarks comparing dataframe tools that include pandas are: + +- [DuckDB (former H2O.ai) benchmarks](https://duckdblabs.github.io/db-benchmark/) +- [TPCH benchmarks](https://pola.rs/posts/benchmarks/) From ef031aff13020332528fb8bcfa7b8e58bec0c710 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:32 -0500 Subject: [PATCH 099/184] New translations benchmarks.md (Tamil) --- web/pandas/ta/community/benchmarks.md | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/ta/community/benchmarks.md diff --git a/web/pandas/ta/community/benchmarks.md b/web/pandas/ta/community/benchmarks.md new file mode 100644 index 000000000..5d05916fb --- /dev/null +++ b/web/pandas/ta/community/benchmarks.md @@ -0,0 +1,40 @@ +# Benchmarks + +Benchmarks are tests to measure the performance of pandas. There are two different +kinds of benchmarks relevant to pandas: + +- Internal pandas benchmarks to measure speed and memory usage over time +- Community benchmarks comparing the speed or memory usage of different tools at + doing the same job + +## pandas benchmarks + +pandas benchmarks are implemented in the [asv_bench](https://github.com/pandas-dev/pandas/tree/main/asv_bench) +directory of our repository. The benchmarks are implemented for the +[airspeed velocity](https://asv.readthedocs.io/en/latest/) (asv for short) framework. + +The benchmarks can be run locally by any pandas developer. This can be done +with the `asv run` command, and it can be useful to detect if local changes have +an impact in performance, by running the benchmarks before and after the changes. +More information on running the performance test suite is found +[here](https://pandas.pydata.org/docs/dev/development/contributing_codebase.html#running-the-performance-test-suite). + +Note that benchmarks are not deterministic, and running in different hardware or +running in the same hardware with different levels of stress have a big impact in +the result. Even running the benchmarks with identical hardware and almost identical +conditions can produce significant differences when running the same exact code. + +## Automated benchmark runner + +The [asv-runner](https://github.com/pandas-dev/asv-runner/) repository automatically runs the pandas asv benchmark suite +for every (or almost every) commit to the `main` branch. It is run on GitHub actions. +See the linked repository for more details. The results are available at: + +https://pandas-dev.github.io/asv-runner/ + +## Community benchmarks + +The main benchmarks comparing dataframe tools that include pandas are: + +- [DuckDB (former H2O.ai) benchmarks](https://duckdblabs.github.io/db-benchmark/) +- [TPCH benchmarks](https://pola.rs/posts/benchmarks/) From a8d91e8cff35cccf354af20e72c3514e5bbc58e5 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:33 -0500 Subject: [PATCH 100/184] New translations benchmarks.md (Hindi) --- web/pandas/hi/community/benchmarks.md | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 web/pandas/hi/community/benchmarks.md diff --git a/web/pandas/hi/community/benchmarks.md b/web/pandas/hi/community/benchmarks.md new file mode 100644 index 000000000..5d05916fb --- /dev/null +++ b/web/pandas/hi/community/benchmarks.md @@ -0,0 +1,40 @@ +# Benchmarks + +Benchmarks are tests to measure the performance of pandas. There are two different +kinds of benchmarks relevant to pandas: + +- Internal pandas benchmarks to measure speed and memory usage over time +- Community benchmarks comparing the speed or memory usage of different tools at + doing the same job + +## pandas benchmarks + +pandas benchmarks are implemented in the [asv_bench](https://github.com/pandas-dev/pandas/tree/main/asv_bench) +directory of our repository. The benchmarks are implemented for the +[airspeed velocity](https://asv.readthedocs.io/en/latest/) (asv for short) framework. + +The benchmarks can be run locally by any pandas developer. This can be done +with the `asv run` command, and it can be useful to detect if local changes have +an impact in performance, by running the benchmarks before and after the changes. +More information on running the performance test suite is found +[here](https://pandas.pydata.org/docs/dev/development/contributing_codebase.html#running-the-performance-test-suite). + +Note that benchmarks are not deterministic, and running in different hardware or +running in the same hardware with different levels of stress have a big impact in +the result. Even running the benchmarks with identical hardware and almost identical +conditions can produce significant differences when running the same exact code. + +## Automated benchmark runner + +The [asv-runner](https://github.com/pandas-dev/asv-runner/) repository automatically runs the pandas asv benchmark suite +for every (or almost every) commit to the `main` branch. It is run on GitHub actions. +See the linked repository for more details. The results are available at: + +https://pandas-dev.github.io/asv-runner/ + +## Community benchmarks + +The main benchmarks comparing dataframe tools that include pandas are: + +- [DuckDB (former H2O.ai) benchmarks](https://duckdblabs.github.io/db-benchmark/) +- [TPCH benchmarks](https://pola.rs/posts/benchmarks/) From 86915bad958432e8216fbc8ddb44697bebec5d32 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:34 -0500 Subject: [PATCH 101/184] New translations coc.md (French) --- web/pandas/fr/community/coc.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 web/pandas/fr/community/coc.md diff --git a/web/pandas/fr/community/coc.md b/web/pandas/fr/community/coc.md new file mode 100644 index 000000000..c26fc5d55 --- /dev/null +++ b/web/pandas/fr/community/coc.md @@ -0,0 +1,65 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery +- Personal attacks +- Trolling or insulting/derogatory comments +- Public or private harassment +- Publishing other's private information, such as physical or electronic + addresses, without explicit permission +- Other unethical or unprofessional conduct + +Furthermore, we encourage inclusive behavior - for example, +please don't say “hey guys!” but “hey everyone!”. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
    + {% for person in maintainers.coc %} +
  • {{ person }}
  • + {% endfor %} +
+ +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[https://www.contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: https://www.contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct From d338c383dc24d7827bc6e00bccd38ea8b7b33976 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:36 -0500 Subject: [PATCH 102/184] New translations coc.md (Arabic) --- web/pandas/ar/community/coc.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 web/pandas/ar/community/coc.md diff --git a/web/pandas/ar/community/coc.md b/web/pandas/ar/community/coc.md new file mode 100644 index 000000000..c26fc5d55 --- /dev/null +++ b/web/pandas/ar/community/coc.md @@ -0,0 +1,65 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery +- Personal attacks +- Trolling or insulting/derogatory comments +- Public or private harassment +- Publishing other's private information, such as physical or electronic + addresses, without explicit permission +- Other unethical or unprofessional conduct + +Furthermore, we encourage inclusive behavior - for example, +please don't say “hey guys!” but “hey everyone!”. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
    + {% for person in maintainers.coc %} +
  • {{ person }}
  • + {% endfor %} +
+ +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[https://www.contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: https://www.contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct From 5d8e8845872e2a3850e28437c32761043d63e930 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:37 -0500 Subject: [PATCH 103/184] New translations coc.md (Catalan) --- web/pandas/ca/community/coc.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 web/pandas/ca/community/coc.md diff --git a/web/pandas/ca/community/coc.md b/web/pandas/ca/community/coc.md new file mode 100644 index 000000000..c26fc5d55 --- /dev/null +++ b/web/pandas/ca/community/coc.md @@ -0,0 +1,65 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery +- Personal attacks +- Trolling or insulting/derogatory comments +- Public or private harassment +- Publishing other's private information, such as physical or electronic + addresses, without explicit permission +- Other unethical or unprofessional conduct + +Furthermore, we encourage inclusive behavior - for example, +please don't say “hey guys!” but “hey everyone!”. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
    + {% for person in maintainers.coc %} +
  • {{ person }}
  • + {% endfor %} +
+ +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[https://www.contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: https://www.contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct From 7d76a37d50cc01c08e69bbcd5de1654a3e93025e Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:38 -0500 Subject: [PATCH 104/184] New translations coc.md (Japanese) --- web/pandas/ja/community/coc.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 web/pandas/ja/community/coc.md diff --git a/web/pandas/ja/community/coc.md b/web/pandas/ja/community/coc.md new file mode 100644 index 000000000..c26fc5d55 --- /dev/null +++ b/web/pandas/ja/community/coc.md @@ -0,0 +1,65 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery +- Personal attacks +- Trolling or insulting/derogatory comments +- Public or private harassment +- Publishing other's private information, such as physical or electronic + addresses, without explicit permission +- Other unethical or unprofessional conduct + +Furthermore, we encourage inclusive behavior - for example, +please don't say “hey guys!” but “hey everyone!”. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
    + {% for person in maintainers.coc %} +
  • {{ person }}
  • + {% endfor %} +
+ +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[https://www.contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: https://www.contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct From 39e11237d0921c8ccb6e52901e676907b29035ad Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:39 -0500 Subject: [PATCH 105/184] New translations coc.md (Korean) --- web/pandas/ko/community/coc.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 web/pandas/ko/community/coc.md diff --git a/web/pandas/ko/community/coc.md b/web/pandas/ko/community/coc.md new file mode 100644 index 000000000..c26fc5d55 --- /dev/null +++ b/web/pandas/ko/community/coc.md @@ -0,0 +1,65 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery +- Personal attacks +- Trolling or insulting/derogatory comments +- Public or private harassment +- Publishing other's private information, such as physical or electronic + addresses, without explicit permission +- Other unethical or unprofessional conduct + +Furthermore, we encourage inclusive behavior - for example, +please don't say “hey guys!” but “hey everyone!”. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
    + {% for person in maintainers.coc %} +
  • {{ person }}
  • + {% endfor %} +
+ +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[https://www.contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: https://www.contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct From 14e9e1d3db122557ae1da0dd1de8646dad16fd70 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:41 -0500 Subject: [PATCH 106/184] New translations coc.md (Polish) --- web/pandas/pl/community/coc.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 web/pandas/pl/community/coc.md diff --git a/web/pandas/pl/community/coc.md b/web/pandas/pl/community/coc.md new file mode 100644 index 000000000..c26fc5d55 --- /dev/null +++ b/web/pandas/pl/community/coc.md @@ -0,0 +1,65 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery +- Personal attacks +- Trolling or insulting/derogatory comments +- Public or private harassment +- Publishing other's private information, such as physical or electronic + addresses, without explicit permission +- Other unethical or unprofessional conduct + +Furthermore, we encourage inclusive behavior - for example, +please don't say “hey guys!” but “hey everyone!”. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
    + {% for person in maintainers.coc %} +
  • {{ person }}
  • + {% endfor %} +
+ +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[https://www.contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: https://www.contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct From 48680124390b952377ca5975b8721dc581ea62b5 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:42 -0500 Subject: [PATCH 107/184] New translations coc.md (Russian) --- web/pandas/ru/community/coc.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 web/pandas/ru/community/coc.md diff --git a/web/pandas/ru/community/coc.md b/web/pandas/ru/community/coc.md new file mode 100644 index 000000000..c26fc5d55 --- /dev/null +++ b/web/pandas/ru/community/coc.md @@ -0,0 +1,65 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery +- Personal attacks +- Trolling or insulting/derogatory comments +- Public or private harassment +- Publishing other's private information, such as physical or electronic + addresses, without explicit permission +- Other unethical or unprofessional conduct + +Furthermore, we encourage inclusive behavior - for example, +please don't say “hey guys!” but “hey everyone!”. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
    + {% for person in maintainers.coc %} +
  • {{ person }}
  • + {% endfor %} +
+ +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[https://www.contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: https://www.contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct From 385d9f91d98108c9a0354ced12b72921f2330e17 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:43 -0500 Subject: [PATCH 108/184] New translations coc.md (Chinese Simplified) --- web/pandas/zh/community/coc.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 web/pandas/zh/community/coc.md diff --git a/web/pandas/zh/community/coc.md b/web/pandas/zh/community/coc.md new file mode 100644 index 000000000..c26fc5d55 --- /dev/null +++ b/web/pandas/zh/community/coc.md @@ -0,0 +1,65 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery +- Personal attacks +- Trolling or insulting/derogatory comments +- Public or private harassment +- Publishing other's private information, such as physical or electronic + addresses, without explicit permission +- Other unethical or unprofessional conduct + +Furthermore, we encourage inclusive behavior - for example, +please don't say “hey guys!” but “hey everyone!”. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
    + {% for person in maintainers.coc %} +
  • {{ person }}
  • + {% endfor %} +
+ +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[https://www.contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: https://www.contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct From 53e660f71d64075935722f35d4a5e5ebf2ad01ec Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:45 -0500 Subject: [PATCH 109/184] New translations coc.md (Persian) --- web/pandas/fa/community/coc.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 web/pandas/fa/community/coc.md diff --git a/web/pandas/fa/community/coc.md b/web/pandas/fa/community/coc.md new file mode 100644 index 000000000..c26fc5d55 --- /dev/null +++ b/web/pandas/fa/community/coc.md @@ -0,0 +1,65 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery +- Personal attacks +- Trolling or insulting/derogatory comments +- Public or private harassment +- Publishing other's private information, such as physical or electronic + addresses, without explicit permission +- Other unethical or unprofessional conduct + +Furthermore, we encourage inclusive behavior - for example, +please don't say “hey guys!” but “hey everyone!”. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
    + {% for person in maintainers.coc %} +
  • {{ person }}
  • + {% endfor %} +
+ +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[https://www.contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: https://www.contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct From 644fc58ef2a5b26533a5caac4628b9171f1ba761 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:46 -0500 Subject: [PATCH 110/184] New translations coc.md (Tamil) --- web/pandas/ta/community/coc.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 web/pandas/ta/community/coc.md diff --git a/web/pandas/ta/community/coc.md b/web/pandas/ta/community/coc.md new file mode 100644 index 000000000..c26fc5d55 --- /dev/null +++ b/web/pandas/ta/community/coc.md @@ -0,0 +1,65 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery +- Personal attacks +- Trolling or insulting/derogatory comments +- Public or private harassment +- Publishing other's private information, such as physical or electronic + addresses, without explicit permission +- Other unethical or unprofessional conduct + +Furthermore, we encourage inclusive behavior - for example, +please don't say “hey guys!” but “hey everyone!”. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
    + {% for person in maintainers.coc %} +
  • {{ person }}
  • + {% endfor %} +
+ +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[https://www.contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: https://www.contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct From 7f14590b47719b60436f9042f98510b3eab1b261 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:47 -0500 Subject: [PATCH 111/184] New translations coc.md (Hindi) --- web/pandas/hi/community/coc.md | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 web/pandas/hi/community/coc.md diff --git a/web/pandas/hi/community/coc.md b/web/pandas/hi/community/coc.md new file mode 100644 index 000000000..c26fc5d55 --- /dev/null +++ b/web/pandas/hi/community/coc.md @@ -0,0 +1,65 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery +- Personal attacks +- Trolling or insulting/derogatory comments +- Public or private harassment +- Publishing other's private information, such as physical or electronic + addresses, without explicit permission +- Other unethical or unprofessional conduct + +Furthermore, we encourage inclusive behavior - for example, +please don't say “hey guys!” but “hey everyone!”. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
    + {% for person in maintainers.coc %} +
  • {{ person }}
  • + {% endfor %} +
+ +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[https://www.contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: https://www.contributor-covenant.org +[version]: https://www.contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct From 9f0097bee201a4cb1421325203a7bffb17c515d7 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:49 -0500 Subject: [PATCH 112/184] New translations ecosystem.md (French) --- web/pandas/fr/community/ecosystem.md | 738 +++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 web/pandas/fr/community/ecosystem.md diff --git a/web/pandas/fr/community/ecosystem.md b/web/pandas/fr/community/ecosystem.md new file mode 100644 index 000000000..61ca1a02d --- /dev/null +++ b/web/pandas/fr/community/ecosystem.md @@ -0,0 +1,738 @@ +# Écosystème + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around its original requirements. + +This is a community-maintained list of projects that build on pandas in order +to provide tools in the PyData space. The pandas core development team does not necessarily endorse any particular project on this list or have any knowledge of the maintenance status of any particular library. + +For a more complete list of projects that depend on pandas, see the libraries.io usage page for +pandas or search pypi for +pandas. + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [skrub](https://skrub-data.org) + +Skrub facilitates machine learning on dataframes. It bridges pandas +to scikit-learn and related. In particular it facilitates building +features from dataframes. + +### [Featuretools](https://github.com/alteryx/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +### [Compose](https://github.com/alteryx/compose) + +Compose is a machine learning tool for labeling data and prediction engineering. +It allows you to structure the labeling process by parameterizing +prediction problems and transforming time-driven relational data into +target values with cutoff times that can be used for supervised learning. + +### [STUMPY](https://github.com/TDAmeritrade/stumpy) + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +[matrix profile](https://stumpy.readthedocs.io/en/latest/Tutorial_The_Matrix_Profile.html), +which can be used for a wide variety of time series data mining tasks. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://docs.bokeh.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [pygwalker](https://github.com/Kanaries/pygwalker) + +PyGWalker is an interactive data visualization and +exploratory data analysis tool built upon Graphic Walker +with support for visualization, cleaning, and annotation workflows. + +pygwalker can save interactively created charts +to Graphic-Walker and Vega-Lite JSON. + +``` +import pygwalker as pyg +pyg.walk(df) +``` + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +``` +import seaborn as sns +sns.set_theme() +``` + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on "The Grammar of +Graphics" +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of matplotlib, +ggplot for Python, and +Seaborn can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has cloud, offline, or on-premise +accounts for private use. + +### [Lux](https://github.com/lux-org/lux) + +Lux is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +```python +import lux +import pandas as pd + +df = pd.read_csv("data.csv") +df # discover interesting insights! +``` + +By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. + +### [D-Tale](https://github.com/man-group/dtale) + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +```python +import dtale + +dtale.show(df) +``` + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the [grid](http://alphatechadmin.pythonanywhere.com/dtale/main/1). + +### [hvplot](https://hvplot.holoviz.org/index.html) + +hvPlot is a high-level plotting API for the PyData ecosystem built on [HoloViews](https://holoviews.org/). +It can be loaded as a native pandas plotting backend via + +```python +pd.set_option("plotting.backend", "hvplot") +``` + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_` and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) +for pandas `display.` settings. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its Variable +Explorer allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copied/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/current/panes/editor.html) and IPython +Console, and Spyder's +[Help pane](https://docs.spyder-ide.org/current/panes/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +### [marimo](https://marimo.io) + +marimo is a reactive notebook for Python and SQL that enhances productivity when working with dataframes. It provides several features to make data manipulation and visualization more interactive and fun: + +1. Rich, interactive displays: marimo can display pandas dataframes in interactive tables or charts with filtering and sorting capabilities. +2. Data selection: Users can select data in tables or pandas-backed plots, and the selections are automatically sent to Python as pandas dataframes. +3. No-code transformations: Users can interactively transform pandas dataframes using a GUI, without writing code. The generated code can be copied and pasted into the notebook. +4. Custom filters: marimo allows the creation of pandas-backed filters using UI elements like sliders and dropdowns. +5. Dataset explorer: marimo automatically discovers and displays all dataframes in the notebook, allowing users to explore and visualize data interactively. +6. SQL integration: marimo allows users to write SQL queries against any pandas dataframes existing in memory. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the pandas-datareader +docs: + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the Federal Reserve Economic Data +(FRED) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/geopandas/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) + +gurobipy-pandas provides a convenient accessor API to connect pandas with +gurobipy. It enables users to more easily and efficiently build mathematical +optimization models from data stored in DataFrames and Series, and to read +solutions back directly as pandas objects. + +### [staircase](https://github.com/staircase-dev/staircase) + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## IO + +### [NTV-pandas](https://github.com/loco-philippe/ntv-pandas) + +NTV-pandas provides a JSON converter with more data types than the ones supported by pandas directly. + +It supports the following data types: + +- pandas data types +- data types defined in the [NTV format](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- data types defined in [Table Schema specification](https://datapackage.org/standard/table-schema/) + +The interface is always reversible (conversion round trip) with two formats (JSON-NTV and JSON-TableSchema). + +Example: + +```python +import ntv_pandas as npd + +jsn = df.npd.to_json(table=False) # save df as a JSON-value (format Table Schema if table is True else format NTV ) +df = npd.read_json(jsn) # load a JSON-value as a `DataFrame` + +df.equals(npd.read_json(df.npd.to_json(df))) # `True` in any case, whether `table=True` or not +``` + +### [BCPandas](https://github.com/yehoshuadimarsky/bcpandas) + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native `df.to_sql` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for `df.to_sql`. + +### [Deltalake](https://pypi.org/project/deltalake) + +Deltalake python package lets you access tables stored in +[Delta Lake](https://delta.io/) natively in Python without the need to use Spark or +JVM. It provides the `delta_table.to_pyarrow_table().to_pandas()` method to convert +any Delta table into Pandas dataframe. + +### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas) + +pandas-gbq provides high performance reads and writes to and from +[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0), +these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`. +Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead. + +### [ArcticDB](https://github.com/man-group/ArcticDB) + +ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/). + +#### ArcticDB Terminology + +ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components: + +- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server. +- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database. +- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables. +- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object. + +#### Installation + +To install, simply run: + +```console +pip install arcticdb +``` + +To get started, we can import ArcticDB and instantiate it: + +```python +import arcticdb as adb +import numpy as np +import pandas as pd +# this will set up the storage using the local file system +arctic = adb.Arctic("lmdb://arcticdb_test") +``` + +> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.\ +> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`. + +#### Library Setup + +ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use: + +```python +lib = arctic.get_library('sample', create_if_missing=True) +``` + +#### Writing Data to ArcticDB + +Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage. + +```python +df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3) + } +) + +df +df.dtypes +``` + +Write to ArcticDB. + +```python +write_record = lib.write("test", df) +``` + +> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types: +> +> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index) +> - `RangeIndex` +> - `DatetimeIndex` +> - `MultiIndex` composed of above supported types +> +> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc'). + +#### Reading Data from ArcticDB + +Read the data back from storage: + +```python +read_record = lib.read("test") +read_record.data +df.dtypes +``` + +ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder). + +### [Hugging Face](https://huggingface.co/datasets) + +The Hugging Face Dataset Hub provides a large collection of ready-to-use datasets for machine learning shared by the community. The platform offers a user-friendly interface to explore, discover and visualize datasets, and provides tools to easily load and work with these datasets in Python thanks to the [huggingface_hub](https://github.com/huggingface/huggingface_hub) library. + +You can access datasets on Hugging Face using `hf://` paths in pandas, in the form `hf://datasets/username/dataset_name/...`. + +For example, here is how to load the [stanfordnlp/imdb dataset](https://huggingface.co/datasets/stanfordnlp/imdb): + +```python +import pandas as pd + +# Load the IMDB dataset +df = pd.read_parquet("hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet") +``` + +Tip: on a dataset page, click on "Use this dataset" to get the code to load it in pandas. + +To save a dataset on Hugging Face you need to [create a public or private dataset](https://huggingface.co/new-dataset) and [login](https://huggingface.co/docs/huggingface_hub/quick-start#login-command), and then you can use `df.to_csv/to_json/to_parquet`: + +```python +# Save the dataset to my Hugging Face account +df.to_parquet("hf://datasets/username/dataset_name/train.parquet") +``` + +You can find more information about the Hugging Face Dataset Hub in the [documentation](https://huggingface.co/docs/hub/en/datasets). + +## Out-of-core + +### [Bodo](https://github.com/bodo-ai/Bodo) + +Bodo is a high-performance compute engine for Python data processing. +Using an auto-parallelizing just-in-time (JIT) compiler, Bodo simplifies scaling Pandas +workloads from laptops to clusters without major code changes. +Under the hood, Bodo relies on MPI-based high-performance computing (HPC) technology—making it +both easier to use and often much faster than alternatives. +Bodo also provides a SQL engine that can query distributed pandas dataframes efficiently. + +```python +import pandas as pd +import bodo + +@bodo.jit +def process_data(): + df = pd.read_parquet("my_data.pq") + df2 = pd.DataFrame({"A": df.apply(lambda r: 0 if r.A == 0 else (r.B // r.A), axis=1)}) + df2.to_parquet("out.pq") + +process_data() +``` + +### [Cylon](https://cylondata.org/) + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +```python +from pycylon import read_csv, DataFrame, CylonEnv +from pycylon.net import MPIConfig + +# Initialize Cylon distributed environment +config: MPIConfig = MPIConfig() +env: CylonEnv = CylonEnv(config=config, distributed=True) + +df1: DataFrame = read_csv('/tmp/csv1.csv') +df2: DataFrame = read_csv('/tmp/csv2.csv') + +# Using 1000s of cores across the cluster to compute the join +df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + +print(df3) +``` + +### [Dask](https://docs.dask.org) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://ml.dask.org) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Ibis](https://ibis-project.org/docs/) + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Modin](https://github.com/modin-project/modin) + +The `modin.pandas` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (`read_csv`, `read_excel`, +`read_parquet`, etc.). + +```python +# import pandas as pd +import modin.pandas as pd + +df = pd.read_csv("big.csv") # use all your cores! +``` + +### [Pandarallel](https://github.com/nalepae/pandarallel) + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +It also displays progress bars. + +```python +from pandarallel import pandarallel + +pandarallel.initialize(progress_bar=True) + +# df.apply(func) +df.parallel_apply(func) +``` + +### [Vaex](https://vaex.io/docs/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- `vaex.from_pandas` +- `vaex.to_pandas_df` + +### [Hail Query](https://hail.is/) + +An out-of-core, preemptible-safe, distributed, dataframe library serving +the genetics community. Hail Query ships with on-disk data formats, +in-memory data formats, an expression compiler, a query planner, and a +distributed sort algorithm all designed to accelerate queries on large +matrices of genome sequencing data. + +It is often easiest to use pandas to manipulate the summary statistics or +other small aggregates produced by Hail. For this reason, Hail provides +native import to and export from pandas DataFrames: + +- [`Table.from_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.from_pandas) +- [`Table.to_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.to_pandas) + +## Data cleaning and validation + +### [pyjanitor](https://github.com/pyjanitor-devs/pyjanitor) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Pandera](https://pandera.readthedocs.io/en/stable/) + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +## Extension data types + +Pandas provides an interface for defining +[extension types](https://pandas.pydata.org/docs/development/extending.html#extension-types) to extend NumPy's type system. +The following libraries implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [awkward-pandas](https://github.com/scikit-hep/awkward) + +Awkward-pandas provides an extension type for storing Awkward +Arrays inside pandas' Series and +DataFrame. It also provides an accessor for using awkward functions +on Series that are of awkward type. + +### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas) + +db-dtypes provides an extension types for working with types like +DATE, TIME, and JSON from database systems. This package is used +by pandas-gbq to provide natural dtypes for BigQuery data types without +a natural numpy type. + +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working +with genomics data. It also includes `genomics` accessors for many useful properties +and methods related to QC and analysis of genomics data. + +### [Physipandas](https://github.com/mocquin/physipandas) + +Physipandas provides an extension for manipulating physical quantities +(like scalar and numpy.ndarray) in association with a physical unit +(like meter or joule) and additional features for integration of +`physipy` accessors with pandas Series and Dataframe. + +### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) + +Pint-Pandas provides an extension type for storing numeric arrays with units. +These arrays can be stored inside pandas' Series and DataFrame. Operations +between Series and DataFrame columns which use pint's extension array are then +units aware. + +### [Text Extensions](https://ibm.biz/text-extensions-for-pandas) + +Text Extensions for Pandas provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into pandas DataFrames. + +## Accessors + +A directory of projects providing +[extension accessors](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). +This is for users to discover new accessors and for library +authors to coordinate on the namespace. + +| Library | Accessor | Classes | +| -------------------------------------------------------------------- | ---------- | --------------------- | +| [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | +| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | +| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | +| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | +| [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | +| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | +| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | +| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | +| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | + +## Development tools + +### [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through these issues [14468](https://github.com/pandas-dev/pandas/issues/14468), +[26766](https://github.com/pandas-dev/pandas/issues/26766), [28142](https://github.com/pandas-dev/pandas/issues/28142). + +See installation and usage instructions on the [GitHub page](https://github.com/VirtusLab/pandas-stubs). + +### [Hamilton](https://github.com/dagworks-inc/hamilton) + +Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a +Pandas code base, specifically with respect to feature engineering for machine learning models. + +It prescribes an opinionated paradigm, that ensures all code is: + +- unit testable +- integration testing friendly +- documentation friendly +- transformation logic is reusable, as it is decoupled from the context of where it is used. +- integratable with runtime data quality checks. + +This helps one to scale your pandas code base, at the same time, keeping maintenance costs low. + +For more information, see [documentation](https://hamilton.readthedocs.io/). From 3953b02598f3d2c52049a19347f9dff896038913 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:52 -0500 Subject: [PATCH 113/184] New translations ecosystem.md (Arabic) --- web/pandas/ar/community/ecosystem.md | 738 +++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 web/pandas/ar/community/ecosystem.md diff --git a/web/pandas/ar/community/ecosystem.md b/web/pandas/ar/community/ecosystem.md new file mode 100644 index 000000000..f8e721d83 --- /dev/null +++ b/web/pandas/ar/community/ecosystem.md @@ -0,0 +1,738 @@ +# Ecosystem + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around its original requirements. + +This is a community-maintained list of projects that build on pandas in order +to provide tools in the PyData space. The pandas core development team does not necessarily endorse any particular project on this list or have any knowledge of the maintenance status of any particular library. + +For a more complete list of projects that depend on pandas, see the libraries.io usage page for +pandas or search pypi for +pandas. + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [skrub](https://skrub-data.org) + +Skrub facilitates machine learning on dataframes. It bridges pandas +to scikit-learn and related. In particular it facilitates building +features from dataframes. + +### [Featuretools](https://github.com/alteryx/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +### [Compose](https://github.com/alteryx/compose) + +Compose is a machine learning tool for labeling data and prediction engineering. +It allows you to structure the labeling process by parameterizing +prediction problems and transforming time-driven relational data into +target values with cutoff times that can be used for supervised learning. + +### [STUMPY](https://github.com/TDAmeritrade/stumpy) + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +[matrix profile](https://stumpy.readthedocs.io/en/latest/Tutorial_The_Matrix_Profile.html), +which can be used for a wide variety of time series data mining tasks. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://docs.bokeh.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [pygwalker](https://github.com/Kanaries/pygwalker) + +PyGWalker is an interactive data visualization and +exploratory data analysis tool built upon Graphic Walker +with support for visualization, cleaning, and annotation workflows. + +pygwalker can save interactively created charts +to Graphic-Walker and Vega-Lite JSON. + +``` +import pygwalker as pyg +pyg.walk(df) +``` + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +``` +import seaborn as sns +sns.set_theme() +``` + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on "The Grammar of +Graphics" +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of matplotlib, +ggplot for Python, and +Seaborn can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has cloud, offline, or on-premise +accounts for private use. + +### [Lux](https://github.com/lux-org/lux) + +Lux is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +```python +import lux +import pandas as pd + +df = pd.read_csv("data.csv") +df # discover interesting insights! +``` + +By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. + +### [D-Tale](https://github.com/man-group/dtale) + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +```python +import dtale + +dtale.show(df) +``` + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the [grid](http://alphatechadmin.pythonanywhere.com/dtale/main/1). + +### [hvplot](https://hvplot.holoviz.org/index.html) + +hvPlot is a high-level plotting API for the PyData ecosystem built on [HoloViews](https://holoviews.org/). +It can be loaded as a native pandas plotting backend via + +```python +pd.set_option("plotting.backend", "hvplot") +``` + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_` and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) +for pandas `display.` settings. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its Variable +Explorer allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copied/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/current/panes/editor.html) and IPython +Console, and Spyder's +[Help pane](https://docs.spyder-ide.org/current/panes/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +### [marimo](https://marimo.io) + +marimo is a reactive notebook for Python and SQL that enhances productivity when working with dataframes. It provides several features to make data manipulation and visualization more interactive and fun: + +1. Rich, interactive displays: marimo can display pandas dataframes in interactive tables or charts with filtering and sorting capabilities. +2. Data selection: Users can select data in tables or pandas-backed plots, and the selections are automatically sent to Python as pandas dataframes. +3. No-code transformations: Users can interactively transform pandas dataframes using a GUI, without writing code. The generated code can be copied and pasted into the notebook. +4. Custom filters: marimo allows the creation of pandas-backed filters using UI elements like sliders and dropdowns. +5. Dataset explorer: marimo automatically discovers and displays all dataframes in the notebook, allowing users to explore and visualize data interactively. +6. SQL integration: marimo allows users to write SQL queries against any pandas dataframes existing in memory. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the pandas-datareader +docs: + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the Federal Reserve Economic Data +(FRED) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/geopandas/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) + +gurobipy-pandas provides a convenient accessor API to connect pandas with +gurobipy. It enables users to more easily and efficiently build mathematical +optimization models from data stored in DataFrames and Series, and to read +solutions back directly as pandas objects. + +### [staircase](https://github.com/staircase-dev/staircase) + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## IO + +### [NTV-pandas](https://github.com/loco-philippe/ntv-pandas) + +NTV-pandas provides a JSON converter with more data types than the ones supported by pandas directly. + +It supports the following data types: + +- pandas data types +- data types defined in the [NTV format](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- data types defined in [Table Schema specification](https://datapackage.org/standard/table-schema/) + +The interface is always reversible (conversion round trip) with two formats (JSON-NTV and JSON-TableSchema). + +Example: + +```python +import ntv_pandas as npd + +jsn = df.npd.to_json(table=False) # save df as a JSON-value (format Table Schema if table is True else format NTV ) +df = npd.read_json(jsn) # load a JSON-value as a `DataFrame` + +df.equals(npd.read_json(df.npd.to_json(df))) # `True` in any case, whether `table=True` or not +``` + +### [BCPandas](https://github.com/yehoshuadimarsky/bcpandas) + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native `df.to_sql` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for `df.to_sql`. + +### [Deltalake](https://pypi.org/project/deltalake) + +Deltalake python package lets you access tables stored in +[Delta Lake](https://delta.io/) natively in Python without the need to use Spark or +JVM. It provides the `delta_table.to_pyarrow_table().to_pandas()` method to convert +any Delta table into Pandas dataframe. + +### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas) + +pandas-gbq provides high performance reads and writes to and from +[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0), +these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`. +Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead. + +### [ArcticDB](https://github.com/man-group/ArcticDB) + +ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/). + +#### ArcticDB Terminology + +ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components: + +- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server. +- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database. +- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables. +- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object. + +#### Installation + +To install, simply run: + +```console +pip install arcticdb +``` + +To get started, we can import ArcticDB and instantiate it: + +```python +import arcticdb as adb +import numpy as np +import pandas as pd +# this will set up the storage using the local file system +arctic = adb.Arctic("lmdb://arcticdb_test") +``` + +> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.\ +> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`. + +#### Library Setup + +ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use: + +```python +lib = arctic.get_library('sample', create_if_missing=True) +``` + +#### Writing Data to ArcticDB + +Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage. + +```python +df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3) + } +) + +df +df.dtypes +``` + +Write to ArcticDB. + +```python +write_record = lib.write("test", df) +``` + +> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types: +> +> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index) +> - `RangeIndex` +> - `DatetimeIndex` +> - `MultiIndex` composed of above supported types +> +> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc'). + +#### Reading Data from ArcticDB + +Read the data back from storage: + +```python +read_record = lib.read("test") +read_record.data +df.dtypes +``` + +ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder). + +### [Hugging Face](https://huggingface.co/datasets) + +The Hugging Face Dataset Hub provides a large collection of ready-to-use datasets for machine learning shared by the community. The platform offers a user-friendly interface to explore, discover and visualize datasets, and provides tools to easily load and work with these datasets in Python thanks to the [huggingface_hub](https://github.com/huggingface/huggingface_hub) library. + +You can access datasets on Hugging Face using `hf://` paths in pandas, in the form `hf://datasets/username/dataset_name/...`. + +For example, here is how to load the [stanfordnlp/imdb dataset](https://huggingface.co/datasets/stanfordnlp/imdb): + +```python +import pandas as pd + +# Load the IMDB dataset +df = pd.read_parquet("hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet") +``` + +Tip: on a dataset page, click on "Use this dataset" to get the code to load it in pandas. + +To save a dataset on Hugging Face you need to [create a public or private dataset](https://huggingface.co/new-dataset) and [login](https://huggingface.co/docs/huggingface_hub/quick-start#login-command), and then you can use `df.to_csv/to_json/to_parquet`: + +```python +# Save the dataset to my Hugging Face account +df.to_parquet("hf://datasets/username/dataset_name/train.parquet") +``` + +You can find more information about the Hugging Face Dataset Hub in the [documentation](https://huggingface.co/docs/hub/en/datasets). + +## Out-of-core + +### [Bodo](https://github.com/bodo-ai/Bodo) + +Bodo is a high-performance compute engine for Python data processing. +Using an auto-parallelizing just-in-time (JIT) compiler, Bodo simplifies scaling Pandas +workloads from laptops to clusters without major code changes. +Under the hood, Bodo relies on MPI-based high-performance computing (HPC) technology—making it +both easier to use and often much faster than alternatives. +Bodo also provides a SQL engine that can query distributed pandas dataframes efficiently. + +```python +import pandas as pd +import bodo + +@bodo.jit +def process_data(): + df = pd.read_parquet("my_data.pq") + df2 = pd.DataFrame({"A": df.apply(lambda r: 0 if r.A == 0 else (r.B // r.A), axis=1)}) + df2.to_parquet("out.pq") + +process_data() +``` + +### [Cylon](https://cylondata.org/) + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +```python +from pycylon import read_csv, DataFrame, CylonEnv +from pycylon.net import MPIConfig + +# Initialize Cylon distributed environment +config: MPIConfig = MPIConfig() +env: CylonEnv = CylonEnv(config=config, distributed=True) + +df1: DataFrame = read_csv('/tmp/csv1.csv') +df2: DataFrame = read_csv('/tmp/csv2.csv') + +# Using 1000s of cores across the cluster to compute the join +df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + +print(df3) +``` + +### [Dask](https://docs.dask.org) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://ml.dask.org) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Ibis](https://ibis-project.org/docs/) + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Modin](https://github.com/modin-project/modin) + +The `modin.pandas` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (`read_csv`, `read_excel`, +`read_parquet`, etc.). + +```python +# import pandas as pd +import modin.pandas as pd + +df = pd.read_csv("big.csv") # use all your cores! +``` + +### [Pandarallel](https://github.com/nalepae/pandarallel) + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +It also displays progress bars. + +```python +from pandarallel import pandarallel + +pandarallel.initialize(progress_bar=True) + +# df.apply(func) +df.parallel_apply(func) +``` + +### [Vaex](https://vaex.io/docs/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- `vaex.from_pandas` +- `vaex.to_pandas_df` + +### [Hail Query](https://hail.is/) + +An out-of-core, preemptible-safe, distributed, dataframe library serving +the genetics community. Hail Query ships with on-disk data formats, +in-memory data formats, an expression compiler, a query planner, and a +distributed sort algorithm all designed to accelerate queries on large +matrices of genome sequencing data. + +It is often easiest to use pandas to manipulate the summary statistics or +other small aggregates produced by Hail. For this reason, Hail provides +native import to and export from pandas DataFrames: + +- [`Table.from_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.from_pandas) +- [`Table.to_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.to_pandas) + +## Data cleaning and validation + +### [pyjanitor](https://github.com/pyjanitor-devs/pyjanitor) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Pandera](https://pandera.readthedocs.io/en/stable/) + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +## Extension data types + +Pandas provides an interface for defining +[extension types](https://pandas.pydata.org/docs/development/extending.html#extension-types) to extend NumPy's type system. +The following libraries implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [awkward-pandas](https://github.com/scikit-hep/awkward) + +Awkward-pandas provides an extension type for storing Awkward +Arrays inside pandas' Series and +DataFrame. It also provides an accessor for using awkward functions +on Series that are of awkward type. + +### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas) + +db-dtypes provides an extension types for working with types like +DATE, TIME, and JSON from database systems. This package is used +by pandas-gbq to provide natural dtypes for BigQuery data types without +a natural numpy type. + +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working +with genomics data. It also includes `genomics` accessors for many useful properties +and methods related to QC and analysis of genomics data. + +### [Physipandas](https://github.com/mocquin/physipandas) + +Physipandas provides an extension for manipulating physical quantities +(like scalar and numpy.ndarray) in association with a physical unit +(like meter or joule) and additional features for integration of +`physipy` accessors with pandas Series and Dataframe. + +### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) + +Pint-Pandas provides an extension type for storing numeric arrays with units. +These arrays can be stored inside pandas' Series and DataFrame. Operations +between Series and DataFrame columns which use pint's extension array are then +units aware. + +### [Text Extensions](https://ibm.biz/text-extensions-for-pandas) + +Text Extensions for Pandas provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into pandas DataFrames. + +## Accessors + +A directory of projects providing +[extension accessors](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). +This is for users to discover new accessors and for library +authors to coordinate on the namespace. + +| Library | Accessor | Classes | +| -------------------------------------------------------------------- | ---------- | --------------------- | +| [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | +| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | +| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | +| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | +| [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | +| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | +| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | +| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | +| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | + +## Development tools + +### [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through these issues [14468](https://github.com/pandas-dev/pandas/issues/14468), +[26766](https://github.com/pandas-dev/pandas/issues/26766), [28142](https://github.com/pandas-dev/pandas/issues/28142). + +See installation and usage instructions on the [GitHub page](https://github.com/VirtusLab/pandas-stubs). + +### [Hamilton](https://github.com/dagworks-inc/hamilton) + +Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a +Pandas code base, specifically with respect to feature engineering for machine learning models. + +It prescribes an opinionated paradigm, that ensures all code is: + +- unit testable +- integration testing friendly +- documentation friendly +- transformation logic is reusable, as it is decoupled from the context of where it is used. +- integratable with runtime data quality checks. + +This helps one to scale your pandas code base, at the same time, keeping maintenance costs low. + +For more information, see [documentation](https://hamilton.readthedocs.io/). From d2e1560cc338183cf853995f35b33d64615fb08d Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:54 -0500 Subject: [PATCH 114/184] New translations ecosystem.md (Catalan) --- web/pandas/ca/community/ecosystem.md | 738 +++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 web/pandas/ca/community/ecosystem.md diff --git a/web/pandas/ca/community/ecosystem.md b/web/pandas/ca/community/ecosystem.md new file mode 100644 index 000000000..f8e721d83 --- /dev/null +++ b/web/pandas/ca/community/ecosystem.md @@ -0,0 +1,738 @@ +# Ecosystem + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around its original requirements. + +This is a community-maintained list of projects that build on pandas in order +to provide tools in the PyData space. The pandas core development team does not necessarily endorse any particular project on this list or have any knowledge of the maintenance status of any particular library. + +For a more complete list of projects that depend on pandas, see the libraries.io usage page for +pandas or search pypi for +pandas. + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [skrub](https://skrub-data.org) + +Skrub facilitates machine learning on dataframes. It bridges pandas +to scikit-learn and related. In particular it facilitates building +features from dataframes. + +### [Featuretools](https://github.com/alteryx/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +### [Compose](https://github.com/alteryx/compose) + +Compose is a machine learning tool for labeling data and prediction engineering. +It allows you to structure the labeling process by parameterizing +prediction problems and transforming time-driven relational data into +target values with cutoff times that can be used for supervised learning. + +### [STUMPY](https://github.com/TDAmeritrade/stumpy) + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +[matrix profile](https://stumpy.readthedocs.io/en/latest/Tutorial_The_Matrix_Profile.html), +which can be used for a wide variety of time series data mining tasks. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://docs.bokeh.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [pygwalker](https://github.com/Kanaries/pygwalker) + +PyGWalker is an interactive data visualization and +exploratory data analysis tool built upon Graphic Walker +with support for visualization, cleaning, and annotation workflows. + +pygwalker can save interactively created charts +to Graphic-Walker and Vega-Lite JSON. + +``` +import pygwalker as pyg +pyg.walk(df) +``` + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +``` +import seaborn as sns +sns.set_theme() +``` + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on "The Grammar of +Graphics" +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of matplotlib, +ggplot for Python, and +Seaborn can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has cloud, offline, or on-premise +accounts for private use. + +### [Lux](https://github.com/lux-org/lux) + +Lux is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +```python +import lux +import pandas as pd + +df = pd.read_csv("data.csv") +df # discover interesting insights! +``` + +By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. + +### [D-Tale](https://github.com/man-group/dtale) + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +```python +import dtale + +dtale.show(df) +``` + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the [grid](http://alphatechadmin.pythonanywhere.com/dtale/main/1). + +### [hvplot](https://hvplot.holoviz.org/index.html) + +hvPlot is a high-level plotting API for the PyData ecosystem built on [HoloViews](https://holoviews.org/). +It can be loaded as a native pandas plotting backend via + +```python +pd.set_option("plotting.backend", "hvplot") +``` + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_` and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) +for pandas `display.` settings. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its Variable +Explorer allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copied/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/current/panes/editor.html) and IPython +Console, and Spyder's +[Help pane](https://docs.spyder-ide.org/current/panes/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +### [marimo](https://marimo.io) + +marimo is a reactive notebook for Python and SQL that enhances productivity when working with dataframes. It provides several features to make data manipulation and visualization more interactive and fun: + +1. Rich, interactive displays: marimo can display pandas dataframes in interactive tables or charts with filtering and sorting capabilities. +2. Data selection: Users can select data in tables or pandas-backed plots, and the selections are automatically sent to Python as pandas dataframes. +3. No-code transformations: Users can interactively transform pandas dataframes using a GUI, without writing code. The generated code can be copied and pasted into the notebook. +4. Custom filters: marimo allows the creation of pandas-backed filters using UI elements like sliders and dropdowns. +5. Dataset explorer: marimo automatically discovers and displays all dataframes in the notebook, allowing users to explore and visualize data interactively. +6. SQL integration: marimo allows users to write SQL queries against any pandas dataframes existing in memory. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the pandas-datareader +docs: + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the Federal Reserve Economic Data +(FRED) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/geopandas/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) + +gurobipy-pandas provides a convenient accessor API to connect pandas with +gurobipy. It enables users to more easily and efficiently build mathematical +optimization models from data stored in DataFrames and Series, and to read +solutions back directly as pandas objects. + +### [staircase](https://github.com/staircase-dev/staircase) + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## IO + +### [NTV-pandas](https://github.com/loco-philippe/ntv-pandas) + +NTV-pandas provides a JSON converter with more data types than the ones supported by pandas directly. + +It supports the following data types: + +- pandas data types +- data types defined in the [NTV format](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- data types defined in [Table Schema specification](https://datapackage.org/standard/table-schema/) + +The interface is always reversible (conversion round trip) with two formats (JSON-NTV and JSON-TableSchema). + +Example: + +```python +import ntv_pandas as npd + +jsn = df.npd.to_json(table=False) # save df as a JSON-value (format Table Schema if table is True else format NTV ) +df = npd.read_json(jsn) # load a JSON-value as a `DataFrame` + +df.equals(npd.read_json(df.npd.to_json(df))) # `True` in any case, whether `table=True` or not +``` + +### [BCPandas](https://github.com/yehoshuadimarsky/bcpandas) + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native `df.to_sql` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for `df.to_sql`. + +### [Deltalake](https://pypi.org/project/deltalake) + +Deltalake python package lets you access tables stored in +[Delta Lake](https://delta.io/) natively in Python without the need to use Spark or +JVM. It provides the `delta_table.to_pyarrow_table().to_pandas()` method to convert +any Delta table into Pandas dataframe. + +### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas) + +pandas-gbq provides high performance reads and writes to and from +[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0), +these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`. +Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead. + +### [ArcticDB](https://github.com/man-group/ArcticDB) + +ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/). + +#### ArcticDB Terminology + +ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components: + +- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server. +- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database. +- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables. +- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object. + +#### Installation + +To install, simply run: + +```console +pip install arcticdb +``` + +To get started, we can import ArcticDB and instantiate it: + +```python +import arcticdb as adb +import numpy as np +import pandas as pd +# this will set up the storage using the local file system +arctic = adb.Arctic("lmdb://arcticdb_test") +``` + +> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.\ +> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`. + +#### Library Setup + +ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use: + +```python +lib = arctic.get_library('sample', create_if_missing=True) +``` + +#### Writing Data to ArcticDB + +Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage. + +```python +df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3) + } +) + +df +df.dtypes +``` + +Write to ArcticDB. + +```python +write_record = lib.write("test", df) +``` + +> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types: +> +> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index) +> - `RangeIndex` +> - `DatetimeIndex` +> - `MultiIndex` composed of above supported types +> +> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc'). + +#### Reading Data from ArcticDB + +Read the data back from storage: + +```python +read_record = lib.read("test") +read_record.data +df.dtypes +``` + +ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder). + +### [Hugging Face](https://huggingface.co/datasets) + +The Hugging Face Dataset Hub provides a large collection of ready-to-use datasets for machine learning shared by the community. The platform offers a user-friendly interface to explore, discover and visualize datasets, and provides tools to easily load and work with these datasets in Python thanks to the [huggingface_hub](https://github.com/huggingface/huggingface_hub) library. + +You can access datasets on Hugging Face using `hf://` paths in pandas, in the form `hf://datasets/username/dataset_name/...`. + +For example, here is how to load the [stanfordnlp/imdb dataset](https://huggingface.co/datasets/stanfordnlp/imdb): + +```python +import pandas as pd + +# Load the IMDB dataset +df = pd.read_parquet("hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet") +``` + +Tip: on a dataset page, click on "Use this dataset" to get the code to load it in pandas. + +To save a dataset on Hugging Face you need to [create a public or private dataset](https://huggingface.co/new-dataset) and [login](https://huggingface.co/docs/huggingface_hub/quick-start#login-command), and then you can use `df.to_csv/to_json/to_parquet`: + +```python +# Save the dataset to my Hugging Face account +df.to_parquet("hf://datasets/username/dataset_name/train.parquet") +``` + +You can find more information about the Hugging Face Dataset Hub in the [documentation](https://huggingface.co/docs/hub/en/datasets). + +## Out-of-core + +### [Bodo](https://github.com/bodo-ai/Bodo) + +Bodo is a high-performance compute engine for Python data processing. +Using an auto-parallelizing just-in-time (JIT) compiler, Bodo simplifies scaling Pandas +workloads from laptops to clusters without major code changes. +Under the hood, Bodo relies on MPI-based high-performance computing (HPC) technology—making it +both easier to use and often much faster than alternatives. +Bodo also provides a SQL engine that can query distributed pandas dataframes efficiently. + +```python +import pandas as pd +import bodo + +@bodo.jit +def process_data(): + df = pd.read_parquet("my_data.pq") + df2 = pd.DataFrame({"A": df.apply(lambda r: 0 if r.A == 0 else (r.B // r.A), axis=1)}) + df2.to_parquet("out.pq") + +process_data() +``` + +### [Cylon](https://cylondata.org/) + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +```python +from pycylon import read_csv, DataFrame, CylonEnv +from pycylon.net import MPIConfig + +# Initialize Cylon distributed environment +config: MPIConfig = MPIConfig() +env: CylonEnv = CylonEnv(config=config, distributed=True) + +df1: DataFrame = read_csv('/tmp/csv1.csv') +df2: DataFrame = read_csv('/tmp/csv2.csv') + +# Using 1000s of cores across the cluster to compute the join +df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + +print(df3) +``` + +### [Dask](https://docs.dask.org) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://ml.dask.org) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Ibis](https://ibis-project.org/docs/) + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Modin](https://github.com/modin-project/modin) + +The `modin.pandas` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (`read_csv`, `read_excel`, +`read_parquet`, etc.). + +```python +# import pandas as pd +import modin.pandas as pd + +df = pd.read_csv("big.csv") # use all your cores! +``` + +### [Pandarallel](https://github.com/nalepae/pandarallel) + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +It also displays progress bars. + +```python +from pandarallel import pandarallel + +pandarallel.initialize(progress_bar=True) + +# df.apply(func) +df.parallel_apply(func) +``` + +### [Vaex](https://vaex.io/docs/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- `vaex.from_pandas` +- `vaex.to_pandas_df` + +### [Hail Query](https://hail.is/) + +An out-of-core, preemptible-safe, distributed, dataframe library serving +the genetics community. Hail Query ships with on-disk data formats, +in-memory data formats, an expression compiler, a query planner, and a +distributed sort algorithm all designed to accelerate queries on large +matrices of genome sequencing data. + +It is often easiest to use pandas to manipulate the summary statistics or +other small aggregates produced by Hail. For this reason, Hail provides +native import to and export from pandas DataFrames: + +- [`Table.from_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.from_pandas) +- [`Table.to_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.to_pandas) + +## Data cleaning and validation + +### [pyjanitor](https://github.com/pyjanitor-devs/pyjanitor) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Pandera](https://pandera.readthedocs.io/en/stable/) + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +## Extension data types + +Pandas provides an interface for defining +[extension types](https://pandas.pydata.org/docs/development/extending.html#extension-types) to extend NumPy's type system. +The following libraries implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [awkward-pandas](https://github.com/scikit-hep/awkward) + +Awkward-pandas provides an extension type for storing Awkward +Arrays inside pandas' Series and +DataFrame. It also provides an accessor for using awkward functions +on Series that are of awkward type. + +### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas) + +db-dtypes provides an extension types for working with types like +DATE, TIME, and JSON from database systems. This package is used +by pandas-gbq to provide natural dtypes for BigQuery data types without +a natural numpy type. + +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working +with genomics data. It also includes `genomics` accessors for many useful properties +and methods related to QC and analysis of genomics data. + +### [Physipandas](https://github.com/mocquin/physipandas) + +Physipandas provides an extension for manipulating physical quantities +(like scalar and numpy.ndarray) in association with a physical unit +(like meter or joule) and additional features for integration of +`physipy` accessors with pandas Series and Dataframe. + +### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) + +Pint-Pandas provides an extension type for storing numeric arrays with units. +These arrays can be stored inside pandas' Series and DataFrame. Operations +between Series and DataFrame columns which use pint's extension array are then +units aware. + +### [Text Extensions](https://ibm.biz/text-extensions-for-pandas) + +Text Extensions for Pandas provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into pandas DataFrames. + +## Accessors + +A directory of projects providing +[extension accessors](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). +This is for users to discover new accessors and for library +authors to coordinate on the namespace. + +| Library | Accessor | Classes | +| -------------------------------------------------------------------- | ---------- | --------------------- | +| [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | +| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | +| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | +| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | +| [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | +| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | +| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | +| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | +| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | + +## Development tools + +### [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through these issues [14468](https://github.com/pandas-dev/pandas/issues/14468), +[26766](https://github.com/pandas-dev/pandas/issues/26766), [28142](https://github.com/pandas-dev/pandas/issues/28142). + +See installation and usage instructions on the [GitHub page](https://github.com/VirtusLab/pandas-stubs). + +### [Hamilton](https://github.com/dagworks-inc/hamilton) + +Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a +Pandas code base, specifically with respect to feature engineering for machine learning models. + +It prescribes an opinionated paradigm, that ensures all code is: + +- unit testable +- integration testing friendly +- documentation friendly +- transformation logic is reusable, as it is decoupled from the context of where it is used. +- integratable with runtime data quality checks. + +This helps one to scale your pandas code base, at the same time, keeping maintenance costs low. + +For more information, see [documentation](https://hamilton.readthedocs.io/). From f31c8a393f81038577a246c0ccd0fa6521c1cdc3 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:56 -0500 Subject: [PATCH 115/184] New translations ecosystem.md (Japanese) --- web/pandas/ja/community/ecosystem.md | 738 +++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 web/pandas/ja/community/ecosystem.md diff --git a/web/pandas/ja/community/ecosystem.md b/web/pandas/ja/community/ecosystem.md new file mode 100644 index 000000000..f8e721d83 --- /dev/null +++ b/web/pandas/ja/community/ecosystem.md @@ -0,0 +1,738 @@ +# Ecosystem + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around its original requirements. + +This is a community-maintained list of projects that build on pandas in order +to provide tools in the PyData space. The pandas core development team does not necessarily endorse any particular project on this list or have any knowledge of the maintenance status of any particular library. + +For a more complete list of projects that depend on pandas, see the libraries.io usage page for +pandas or search pypi for +pandas. + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [skrub](https://skrub-data.org) + +Skrub facilitates machine learning on dataframes. It bridges pandas +to scikit-learn and related. In particular it facilitates building +features from dataframes. + +### [Featuretools](https://github.com/alteryx/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +### [Compose](https://github.com/alteryx/compose) + +Compose is a machine learning tool for labeling data and prediction engineering. +It allows you to structure the labeling process by parameterizing +prediction problems and transforming time-driven relational data into +target values with cutoff times that can be used for supervised learning. + +### [STUMPY](https://github.com/TDAmeritrade/stumpy) + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +[matrix profile](https://stumpy.readthedocs.io/en/latest/Tutorial_The_Matrix_Profile.html), +which can be used for a wide variety of time series data mining tasks. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://docs.bokeh.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [pygwalker](https://github.com/Kanaries/pygwalker) + +PyGWalker is an interactive data visualization and +exploratory data analysis tool built upon Graphic Walker +with support for visualization, cleaning, and annotation workflows. + +pygwalker can save interactively created charts +to Graphic-Walker and Vega-Lite JSON. + +``` +import pygwalker as pyg +pyg.walk(df) +``` + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +``` +import seaborn as sns +sns.set_theme() +``` + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on "The Grammar of +Graphics" +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of matplotlib, +ggplot for Python, and +Seaborn can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has cloud, offline, or on-premise +accounts for private use. + +### [Lux](https://github.com/lux-org/lux) + +Lux is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +```python +import lux +import pandas as pd + +df = pd.read_csv("data.csv") +df # discover interesting insights! +``` + +By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. + +### [D-Tale](https://github.com/man-group/dtale) + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +```python +import dtale + +dtale.show(df) +``` + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the [grid](http://alphatechadmin.pythonanywhere.com/dtale/main/1). + +### [hvplot](https://hvplot.holoviz.org/index.html) + +hvPlot is a high-level plotting API for the PyData ecosystem built on [HoloViews](https://holoviews.org/). +It can be loaded as a native pandas plotting backend via + +```python +pd.set_option("plotting.backend", "hvplot") +``` + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_` and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) +for pandas `display.` settings. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its Variable +Explorer allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copied/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/current/panes/editor.html) and IPython +Console, and Spyder's +[Help pane](https://docs.spyder-ide.org/current/panes/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +### [marimo](https://marimo.io) + +marimo is a reactive notebook for Python and SQL that enhances productivity when working with dataframes. It provides several features to make data manipulation and visualization more interactive and fun: + +1. Rich, interactive displays: marimo can display pandas dataframes in interactive tables or charts with filtering and sorting capabilities. +2. Data selection: Users can select data in tables or pandas-backed plots, and the selections are automatically sent to Python as pandas dataframes. +3. No-code transformations: Users can interactively transform pandas dataframes using a GUI, without writing code. The generated code can be copied and pasted into the notebook. +4. Custom filters: marimo allows the creation of pandas-backed filters using UI elements like sliders and dropdowns. +5. Dataset explorer: marimo automatically discovers and displays all dataframes in the notebook, allowing users to explore and visualize data interactively. +6. SQL integration: marimo allows users to write SQL queries against any pandas dataframes existing in memory. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the pandas-datareader +docs: + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the Federal Reserve Economic Data +(FRED) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/geopandas/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) + +gurobipy-pandas provides a convenient accessor API to connect pandas with +gurobipy. It enables users to more easily and efficiently build mathematical +optimization models from data stored in DataFrames and Series, and to read +solutions back directly as pandas objects. + +### [staircase](https://github.com/staircase-dev/staircase) + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## IO + +### [NTV-pandas](https://github.com/loco-philippe/ntv-pandas) + +NTV-pandas provides a JSON converter with more data types than the ones supported by pandas directly. + +It supports the following data types: + +- pandas data types +- data types defined in the [NTV format](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- data types defined in [Table Schema specification](https://datapackage.org/standard/table-schema/) + +The interface is always reversible (conversion round trip) with two formats (JSON-NTV and JSON-TableSchema). + +Example: + +```python +import ntv_pandas as npd + +jsn = df.npd.to_json(table=False) # save df as a JSON-value (format Table Schema if table is True else format NTV ) +df = npd.read_json(jsn) # load a JSON-value as a `DataFrame` + +df.equals(npd.read_json(df.npd.to_json(df))) # `True` in any case, whether `table=True` or not +``` + +### [BCPandas](https://github.com/yehoshuadimarsky/bcpandas) + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native `df.to_sql` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for `df.to_sql`. + +### [Deltalake](https://pypi.org/project/deltalake) + +Deltalake python package lets you access tables stored in +[Delta Lake](https://delta.io/) natively in Python without the need to use Spark or +JVM. It provides the `delta_table.to_pyarrow_table().to_pandas()` method to convert +any Delta table into Pandas dataframe. + +### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas) + +pandas-gbq provides high performance reads and writes to and from +[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0), +these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`. +Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead. + +### [ArcticDB](https://github.com/man-group/ArcticDB) + +ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/). + +#### ArcticDB Terminology + +ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components: + +- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server. +- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database. +- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables. +- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object. + +#### Installation + +To install, simply run: + +```console +pip install arcticdb +``` + +To get started, we can import ArcticDB and instantiate it: + +```python +import arcticdb as adb +import numpy as np +import pandas as pd +# this will set up the storage using the local file system +arctic = adb.Arctic("lmdb://arcticdb_test") +``` + +> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.\ +> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`. + +#### Library Setup + +ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use: + +```python +lib = arctic.get_library('sample', create_if_missing=True) +``` + +#### Writing Data to ArcticDB + +Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage. + +```python +df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3) + } +) + +df +df.dtypes +``` + +Write to ArcticDB. + +```python +write_record = lib.write("test", df) +``` + +> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types: +> +> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index) +> - `RangeIndex` +> - `DatetimeIndex` +> - `MultiIndex` composed of above supported types +> +> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc'). + +#### Reading Data from ArcticDB + +Read the data back from storage: + +```python +read_record = lib.read("test") +read_record.data +df.dtypes +``` + +ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder). + +### [Hugging Face](https://huggingface.co/datasets) + +The Hugging Face Dataset Hub provides a large collection of ready-to-use datasets for machine learning shared by the community. The platform offers a user-friendly interface to explore, discover and visualize datasets, and provides tools to easily load and work with these datasets in Python thanks to the [huggingface_hub](https://github.com/huggingface/huggingface_hub) library. + +You can access datasets on Hugging Face using `hf://` paths in pandas, in the form `hf://datasets/username/dataset_name/...`. + +For example, here is how to load the [stanfordnlp/imdb dataset](https://huggingface.co/datasets/stanfordnlp/imdb): + +```python +import pandas as pd + +# Load the IMDB dataset +df = pd.read_parquet("hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet") +``` + +Tip: on a dataset page, click on "Use this dataset" to get the code to load it in pandas. + +To save a dataset on Hugging Face you need to [create a public or private dataset](https://huggingface.co/new-dataset) and [login](https://huggingface.co/docs/huggingface_hub/quick-start#login-command), and then you can use `df.to_csv/to_json/to_parquet`: + +```python +# Save the dataset to my Hugging Face account +df.to_parquet("hf://datasets/username/dataset_name/train.parquet") +``` + +You can find more information about the Hugging Face Dataset Hub in the [documentation](https://huggingface.co/docs/hub/en/datasets). + +## Out-of-core + +### [Bodo](https://github.com/bodo-ai/Bodo) + +Bodo is a high-performance compute engine for Python data processing. +Using an auto-parallelizing just-in-time (JIT) compiler, Bodo simplifies scaling Pandas +workloads from laptops to clusters without major code changes. +Under the hood, Bodo relies on MPI-based high-performance computing (HPC) technology—making it +both easier to use and often much faster than alternatives. +Bodo also provides a SQL engine that can query distributed pandas dataframes efficiently. + +```python +import pandas as pd +import bodo + +@bodo.jit +def process_data(): + df = pd.read_parquet("my_data.pq") + df2 = pd.DataFrame({"A": df.apply(lambda r: 0 if r.A == 0 else (r.B // r.A), axis=1)}) + df2.to_parquet("out.pq") + +process_data() +``` + +### [Cylon](https://cylondata.org/) + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +```python +from pycylon import read_csv, DataFrame, CylonEnv +from pycylon.net import MPIConfig + +# Initialize Cylon distributed environment +config: MPIConfig = MPIConfig() +env: CylonEnv = CylonEnv(config=config, distributed=True) + +df1: DataFrame = read_csv('/tmp/csv1.csv') +df2: DataFrame = read_csv('/tmp/csv2.csv') + +# Using 1000s of cores across the cluster to compute the join +df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + +print(df3) +``` + +### [Dask](https://docs.dask.org) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://ml.dask.org) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Ibis](https://ibis-project.org/docs/) + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Modin](https://github.com/modin-project/modin) + +The `modin.pandas` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (`read_csv`, `read_excel`, +`read_parquet`, etc.). + +```python +# import pandas as pd +import modin.pandas as pd + +df = pd.read_csv("big.csv") # use all your cores! +``` + +### [Pandarallel](https://github.com/nalepae/pandarallel) + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +It also displays progress bars. + +```python +from pandarallel import pandarallel + +pandarallel.initialize(progress_bar=True) + +# df.apply(func) +df.parallel_apply(func) +``` + +### [Vaex](https://vaex.io/docs/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- `vaex.from_pandas` +- `vaex.to_pandas_df` + +### [Hail Query](https://hail.is/) + +An out-of-core, preemptible-safe, distributed, dataframe library serving +the genetics community. Hail Query ships with on-disk data formats, +in-memory data formats, an expression compiler, a query planner, and a +distributed sort algorithm all designed to accelerate queries on large +matrices of genome sequencing data. + +It is often easiest to use pandas to manipulate the summary statistics or +other small aggregates produced by Hail. For this reason, Hail provides +native import to and export from pandas DataFrames: + +- [`Table.from_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.from_pandas) +- [`Table.to_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.to_pandas) + +## Data cleaning and validation + +### [pyjanitor](https://github.com/pyjanitor-devs/pyjanitor) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Pandera](https://pandera.readthedocs.io/en/stable/) + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +## Extension data types + +Pandas provides an interface for defining +[extension types](https://pandas.pydata.org/docs/development/extending.html#extension-types) to extend NumPy's type system. +The following libraries implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [awkward-pandas](https://github.com/scikit-hep/awkward) + +Awkward-pandas provides an extension type for storing Awkward +Arrays inside pandas' Series and +DataFrame. It also provides an accessor for using awkward functions +on Series that are of awkward type. + +### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas) + +db-dtypes provides an extension types for working with types like +DATE, TIME, and JSON from database systems. This package is used +by pandas-gbq to provide natural dtypes for BigQuery data types without +a natural numpy type. + +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working +with genomics data. It also includes `genomics` accessors for many useful properties +and methods related to QC and analysis of genomics data. + +### [Physipandas](https://github.com/mocquin/physipandas) + +Physipandas provides an extension for manipulating physical quantities +(like scalar and numpy.ndarray) in association with a physical unit +(like meter or joule) and additional features for integration of +`physipy` accessors with pandas Series and Dataframe. + +### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) + +Pint-Pandas provides an extension type for storing numeric arrays with units. +These arrays can be stored inside pandas' Series and DataFrame. Operations +between Series and DataFrame columns which use pint's extension array are then +units aware. + +### [Text Extensions](https://ibm.biz/text-extensions-for-pandas) + +Text Extensions for Pandas provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into pandas DataFrames. + +## Accessors + +A directory of projects providing +[extension accessors](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). +This is for users to discover new accessors and for library +authors to coordinate on the namespace. + +| Library | Accessor | Classes | +| -------------------------------------------------------------------- | ---------- | --------------------- | +| [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | +| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | +| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | +| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | +| [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | +| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | +| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | +| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | +| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | + +## Development tools + +### [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through these issues [14468](https://github.com/pandas-dev/pandas/issues/14468), +[26766](https://github.com/pandas-dev/pandas/issues/26766), [28142](https://github.com/pandas-dev/pandas/issues/28142). + +See installation and usage instructions on the [GitHub page](https://github.com/VirtusLab/pandas-stubs). + +### [Hamilton](https://github.com/dagworks-inc/hamilton) + +Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a +Pandas code base, specifically with respect to feature engineering for machine learning models. + +It prescribes an opinionated paradigm, that ensures all code is: + +- unit testable +- integration testing friendly +- documentation friendly +- transformation logic is reusable, as it is decoupled from the context of where it is used. +- integratable with runtime data quality checks. + +This helps one to scale your pandas code base, at the same time, keeping maintenance costs low. + +For more information, see [documentation](https://hamilton.readthedocs.io/). From f39f37ba60fccbd5f51953441340ce38ee2f29a2 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:57 -0500 Subject: [PATCH 116/184] New translations ecosystem.md (Korean) --- web/pandas/ko/community/ecosystem.md | 738 +++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 web/pandas/ko/community/ecosystem.md diff --git a/web/pandas/ko/community/ecosystem.md b/web/pandas/ko/community/ecosystem.md new file mode 100644 index 000000000..f8e721d83 --- /dev/null +++ b/web/pandas/ko/community/ecosystem.md @@ -0,0 +1,738 @@ +# Ecosystem + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around its original requirements. + +This is a community-maintained list of projects that build on pandas in order +to provide tools in the PyData space. The pandas core development team does not necessarily endorse any particular project on this list or have any knowledge of the maintenance status of any particular library. + +For a more complete list of projects that depend on pandas, see the libraries.io usage page for +pandas or search pypi for +pandas. + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [skrub](https://skrub-data.org) + +Skrub facilitates machine learning on dataframes. It bridges pandas +to scikit-learn and related. In particular it facilitates building +features from dataframes. + +### [Featuretools](https://github.com/alteryx/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +### [Compose](https://github.com/alteryx/compose) + +Compose is a machine learning tool for labeling data and prediction engineering. +It allows you to structure the labeling process by parameterizing +prediction problems and transforming time-driven relational data into +target values with cutoff times that can be used for supervised learning. + +### [STUMPY](https://github.com/TDAmeritrade/stumpy) + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +[matrix profile](https://stumpy.readthedocs.io/en/latest/Tutorial_The_Matrix_Profile.html), +which can be used for a wide variety of time series data mining tasks. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://docs.bokeh.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [pygwalker](https://github.com/Kanaries/pygwalker) + +PyGWalker is an interactive data visualization and +exploratory data analysis tool built upon Graphic Walker +with support for visualization, cleaning, and annotation workflows. + +pygwalker can save interactively created charts +to Graphic-Walker and Vega-Lite JSON. + +``` +import pygwalker as pyg +pyg.walk(df) +``` + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +``` +import seaborn as sns +sns.set_theme() +``` + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on "The Grammar of +Graphics" +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of matplotlib, +ggplot for Python, and +Seaborn can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has cloud, offline, or on-premise +accounts for private use. + +### [Lux](https://github.com/lux-org/lux) + +Lux is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +```python +import lux +import pandas as pd + +df = pd.read_csv("data.csv") +df # discover interesting insights! +``` + +By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. + +### [D-Tale](https://github.com/man-group/dtale) + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +```python +import dtale + +dtale.show(df) +``` + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the [grid](http://alphatechadmin.pythonanywhere.com/dtale/main/1). + +### [hvplot](https://hvplot.holoviz.org/index.html) + +hvPlot is a high-level plotting API for the PyData ecosystem built on [HoloViews](https://holoviews.org/). +It can be loaded as a native pandas plotting backend via + +```python +pd.set_option("plotting.backend", "hvplot") +``` + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_` and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) +for pandas `display.` settings. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its Variable +Explorer allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copied/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/current/panes/editor.html) and IPython +Console, and Spyder's +[Help pane](https://docs.spyder-ide.org/current/panes/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +### [marimo](https://marimo.io) + +marimo is a reactive notebook for Python and SQL that enhances productivity when working with dataframes. It provides several features to make data manipulation and visualization more interactive and fun: + +1. Rich, interactive displays: marimo can display pandas dataframes in interactive tables or charts with filtering and sorting capabilities. +2. Data selection: Users can select data in tables or pandas-backed plots, and the selections are automatically sent to Python as pandas dataframes. +3. No-code transformations: Users can interactively transform pandas dataframes using a GUI, without writing code. The generated code can be copied and pasted into the notebook. +4. Custom filters: marimo allows the creation of pandas-backed filters using UI elements like sliders and dropdowns. +5. Dataset explorer: marimo automatically discovers and displays all dataframes in the notebook, allowing users to explore and visualize data interactively. +6. SQL integration: marimo allows users to write SQL queries against any pandas dataframes existing in memory. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the pandas-datareader +docs: + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the Federal Reserve Economic Data +(FRED) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/geopandas/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) + +gurobipy-pandas provides a convenient accessor API to connect pandas with +gurobipy. It enables users to more easily and efficiently build mathematical +optimization models from data stored in DataFrames and Series, and to read +solutions back directly as pandas objects. + +### [staircase](https://github.com/staircase-dev/staircase) + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## IO + +### [NTV-pandas](https://github.com/loco-philippe/ntv-pandas) + +NTV-pandas provides a JSON converter with more data types than the ones supported by pandas directly. + +It supports the following data types: + +- pandas data types +- data types defined in the [NTV format](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- data types defined in [Table Schema specification](https://datapackage.org/standard/table-schema/) + +The interface is always reversible (conversion round trip) with two formats (JSON-NTV and JSON-TableSchema). + +Example: + +```python +import ntv_pandas as npd + +jsn = df.npd.to_json(table=False) # save df as a JSON-value (format Table Schema if table is True else format NTV ) +df = npd.read_json(jsn) # load a JSON-value as a `DataFrame` + +df.equals(npd.read_json(df.npd.to_json(df))) # `True` in any case, whether `table=True` or not +``` + +### [BCPandas](https://github.com/yehoshuadimarsky/bcpandas) + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native `df.to_sql` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for `df.to_sql`. + +### [Deltalake](https://pypi.org/project/deltalake) + +Deltalake python package lets you access tables stored in +[Delta Lake](https://delta.io/) natively in Python without the need to use Spark or +JVM. It provides the `delta_table.to_pyarrow_table().to_pandas()` method to convert +any Delta table into Pandas dataframe. + +### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas) + +pandas-gbq provides high performance reads and writes to and from +[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0), +these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`. +Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead. + +### [ArcticDB](https://github.com/man-group/ArcticDB) + +ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/). + +#### ArcticDB Terminology + +ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components: + +- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server. +- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database. +- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables. +- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object. + +#### Installation + +To install, simply run: + +```console +pip install arcticdb +``` + +To get started, we can import ArcticDB and instantiate it: + +```python +import arcticdb as adb +import numpy as np +import pandas as pd +# this will set up the storage using the local file system +arctic = adb.Arctic("lmdb://arcticdb_test") +``` + +> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.\ +> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`. + +#### Library Setup + +ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use: + +```python +lib = arctic.get_library('sample', create_if_missing=True) +``` + +#### Writing Data to ArcticDB + +Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage. + +```python +df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3) + } +) + +df +df.dtypes +``` + +Write to ArcticDB. + +```python +write_record = lib.write("test", df) +``` + +> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types: +> +> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index) +> - `RangeIndex` +> - `DatetimeIndex` +> - `MultiIndex` composed of above supported types +> +> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc'). + +#### Reading Data from ArcticDB + +Read the data back from storage: + +```python +read_record = lib.read("test") +read_record.data +df.dtypes +``` + +ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder). + +### [Hugging Face](https://huggingface.co/datasets) + +The Hugging Face Dataset Hub provides a large collection of ready-to-use datasets for machine learning shared by the community. The platform offers a user-friendly interface to explore, discover and visualize datasets, and provides tools to easily load and work with these datasets in Python thanks to the [huggingface_hub](https://github.com/huggingface/huggingface_hub) library. + +You can access datasets on Hugging Face using `hf://` paths in pandas, in the form `hf://datasets/username/dataset_name/...`. + +For example, here is how to load the [stanfordnlp/imdb dataset](https://huggingface.co/datasets/stanfordnlp/imdb): + +```python +import pandas as pd + +# Load the IMDB dataset +df = pd.read_parquet("hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet") +``` + +Tip: on a dataset page, click on "Use this dataset" to get the code to load it in pandas. + +To save a dataset on Hugging Face you need to [create a public or private dataset](https://huggingface.co/new-dataset) and [login](https://huggingface.co/docs/huggingface_hub/quick-start#login-command), and then you can use `df.to_csv/to_json/to_parquet`: + +```python +# Save the dataset to my Hugging Face account +df.to_parquet("hf://datasets/username/dataset_name/train.parquet") +``` + +You can find more information about the Hugging Face Dataset Hub in the [documentation](https://huggingface.co/docs/hub/en/datasets). + +## Out-of-core + +### [Bodo](https://github.com/bodo-ai/Bodo) + +Bodo is a high-performance compute engine for Python data processing. +Using an auto-parallelizing just-in-time (JIT) compiler, Bodo simplifies scaling Pandas +workloads from laptops to clusters without major code changes. +Under the hood, Bodo relies on MPI-based high-performance computing (HPC) technology—making it +both easier to use and often much faster than alternatives. +Bodo also provides a SQL engine that can query distributed pandas dataframes efficiently. + +```python +import pandas as pd +import bodo + +@bodo.jit +def process_data(): + df = pd.read_parquet("my_data.pq") + df2 = pd.DataFrame({"A": df.apply(lambda r: 0 if r.A == 0 else (r.B // r.A), axis=1)}) + df2.to_parquet("out.pq") + +process_data() +``` + +### [Cylon](https://cylondata.org/) + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +```python +from pycylon import read_csv, DataFrame, CylonEnv +from pycylon.net import MPIConfig + +# Initialize Cylon distributed environment +config: MPIConfig = MPIConfig() +env: CylonEnv = CylonEnv(config=config, distributed=True) + +df1: DataFrame = read_csv('/tmp/csv1.csv') +df2: DataFrame = read_csv('/tmp/csv2.csv') + +# Using 1000s of cores across the cluster to compute the join +df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + +print(df3) +``` + +### [Dask](https://docs.dask.org) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://ml.dask.org) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Ibis](https://ibis-project.org/docs/) + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Modin](https://github.com/modin-project/modin) + +The `modin.pandas` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (`read_csv`, `read_excel`, +`read_parquet`, etc.). + +```python +# import pandas as pd +import modin.pandas as pd + +df = pd.read_csv("big.csv") # use all your cores! +``` + +### [Pandarallel](https://github.com/nalepae/pandarallel) + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +It also displays progress bars. + +```python +from pandarallel import pandarallel + +pandarallel.initialize(progress_bar=True) + +# df.apply(func) +df.parallel_apply(func) +``` + +### [Vaex](https://vaex.io/docs/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- `vaex.from_pandas` +- `vaex.to_pandas_df` + +### [Hail Query](https://hail.is/) + +An out-of-core, preemptible-safe, distributed, dataframe library serving +the genetics community. Hail Query ships with on-disk data formats, +in-memory data formats, an expression compiler, a query planner, and a +distributed sort algorithm all designed to accelerate queries on large +matrices of genome sequencing data. + +It is often easiest to use pandas to manipulate the summary statistics or +other small aggregates produced by Hail. For this reason, Hail provides +native import to and export from pandas DataFrames: + +- [`Table.from_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.from_pandas) +- [`Table.to_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.to_pandas) + +## Data cleaning and validation + +### [pyjanitor](https://github.com/pyjanitor-devs/pyjanitor) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Pandera](https://pandera.readthedocs.io/en/stable/) + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +## Extension data types + +Pandas provides an interface for defining +[extension types](https://pandas.pydata.org/docs/development/extending.html#extension-types) to extend NumPy's type system. +The following libraries implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [awkward-pandas](https://github.com/scikit-hep/awkward) + +Awkward-pandas provides an extension type for storing Awkward +Arrays inside pandas' Series and +DataFrame. It also provides an accessor for using awkward functions +on Series that are of awkward type. + +### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas) + +db-dtypes provides an extension types for working with types like +DATE, TIME, and JSON from database systems. This package is used +by pandas-gbq to provide natural dtypes for BigQuery data types without +a natural numpy type. + +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working +with genomics data. It also includes `genomics` accessors for many useful properties +and methods related to QC and analysis of genomics data. + +### [Physipandas](https://github.com/mocquin/physipandas) + +Physipandas provides an extension for manipulating physical quantities +(like scalar and numpy.ndarray) in association with a physical unit +(like meter or joule) and additional features for integration of +`physipy` accessors with pandas Series and Dataframe. + +### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) + +Pint-Pandas provides an extension type for storing numeric arrays with units. +These arrays can be stored inside pandas' Series and DataFrame. Operations +between Series and DataFrame columns which use pint's extension array are then +units aware. + +### [Text Extensions](https://ibm.biz/text-extensions-for-pandas) + +Text Extensions for Pandas provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into pandas DataFrames. + +## Accessors + +A directory of projects providing +[extension accessors](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). +This is for users to discover new accessors and for library +authors to coordinate on the namespace. + +| Library | Accessor | Classes | +| -------------------------------------------------------------------- | ---------- | --------------------- | +| [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | +| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | +| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | +| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | +| [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | +| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | +| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | +| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | +| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | + +## Development tools + +### [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through these issues [14468](https://github.com/pandas-dev/pandas/issues/14468), +[26766](https://github.com/pandas-dev/pandas/issues/26766), [28142](https://github.com/pandas-dev/pandas/issues/28142). + +See installation and usage instructions on the [GitHub page](https://github.com/VirtusLab/pandas-stubs). + +### [Hamilton](https://github.com/dagworks-inc/hamilton) + +Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a +Pandas code base, specifically with respect to feature engineering for machine learning models. + +It prescribes an opinionated paradigm, that ensures all code is: + +- unit testable +- integration testing friendly +- documentation friendly +- transformation logic is reusable, as it is decoupled from the context of where it is used. +- integratable with runtime data quality checks. + +This helps one to scale your pandas code base, at the same time, keeping maintenance costs low. + +For more information, see [documentation](https://hamilton.readthedocs.io/). From 5d91dcb9b34c4c075809e6bafc1ab3c93a843478 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:08:59 -0500 Subject: [PATCH 117/184] New translations ecosystem.md (Polish) --- web/pandas/pl/community/ecosystem.md | 738 +++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 web/pandas/pl/community/ecosystem.md diff --git a/web/pandas/pl/community/ecosystem.md b/web/pandas/pl/community/ecosystem.md new file mode 100644 index 000000000..f8e721d83 --- /dev/null +++ b/web/pandas/pl/community/ecosystem.md @@ -0,0 +1,738 @@ +# Ecosystem + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around its original requirements. + +This is a community-maintained list of projects that build on pandas in order +to provide tools in the PyData space. The pandas core development team does not necessarily endorse any particular project on this list or have any knowledge of the maintenance status of any particular library. + +For a more complete list of projects that depend on pandas, see the libraries.io usage page for +pandas or search pypi for +pandas. + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [skrub](https://skrub-data.org) + +Skrub facilitates machine learning on dataframes. It bridges pandas +to scikit-learn and related. In particular it facilitates building +features from dataframes. + +### [Featuretools](https://github.com/alteryx/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +### [Compose](https://github.com/alteryx/compose) + +Compose is a machine learning tool for labeling data and prediction engineering. +It allows you to structure the labeling process by parameterizing +prediction problems and transforming time-driven relational data into +target values with cutoff times that can be used for supervised learning. + +### [STUMPY](https://github.com/TDAmeritrade/stumpy) + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +[matrix profile](https://stumpy.readthedocs.io/en/latest/Tutorial_The_Matrix_Profile.html), +which can be used for a wide variety of time series data mining tasks. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://docs.bokeh.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [pygwalker](https://github.com/Kanaries/pygwalker) + +PyGWalker is an interactive data visualization and +exploratory data analysis tool built upon Graphic Walker +with support for visualization, cleaning, and annotation workflows. + +pygwalker can save interactively created charts +to Graphic-Walker and Vega-Lite JSON. + +``` +import pygwalker as pyg +pyg.walk(df) +``` + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +``` +import seaborn as sns +sns.set_theme() +``` + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on "The Grammar of +Graphics" +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of matplotlib, +ggplot for Python, and +Seaborn can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has cloud, offline, or on-premise +accounts for private use. + +### [Lux](https://github.com/lux-org/lux) + +Lux is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +```python +import lux +import pandas as pd + +df = pd.read_csv("data.csv") +df # discover interesting insights! +``` + +By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. + +### [D-Tale](https://github.com/man-group/dtale) + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +```python +import dtale + +dtale.show(df) +``` + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the [grid](http://alphatechadmin.pythonanywhere.com/dtale/main/1). + +### [hvplot](https://hvplot.holoviz.org/index.html) + +hvPlot is a high-level plotting API for the PyData ecosystem built on [HoloViews](https://holoviews.org/). +It can be loaded as a native pandas plotting backend via + +```python +pd.set_option("plotting.backend", "hvplot") +``` + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_` and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) +for pandas `display.` settings. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its Variable +Explorer allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copied/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/current/panes/editor.html) and IPython +Console, and Spyder's +[Help pane](https://docs.spyder-ide.org/current/panes/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +### [marimo](https://marimo.io) + +marimo is a reactive notebook for Python and SQL that enhances productivity when working with dataframes. It provides several features to make data manipulation and visualization more interactive and fun: + +1. Rich, interactive displays: marimo can display pandas dataframes in interactive tables or charts with filtering and sorting capabilities. +2. Data selection: Users can select data in tables or pandas-backed plots, and the selections are automatically sent to Python as pandas dataframes. +3. No-code transformations: Users can interactively transform pandas dataframes using a GUI, without writing code. The generated code can be copied and pasted into the notebook. +4. Custom filters: marimo allows the creation of pandas-backed filters using UI elements like sliders and dropdowns. +5. Dataset explorer: marimo automatically discovers and displays all dataframes in the notebook, allowing users to explore and visualize data interactively. +6. SQL integration: marimo allows users to write SQL queries against any pandas dataframes existing in memory. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the pandas-datareader +docs: + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the Federal Reserve Economic Data +(FRED) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/geopandas/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) + +gurobipy-pandas provides a convenient accessor API to connect pandas with +gurobipy. It enables users to more easily and efficiently build mathematical +optimization models from data stored in DataFrames and Series, and to read +solutions back directly as pandas objects. + +### [staircase](https://github.com/staircase-dev/staircase) + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## IO + +### [NTV-pandas](https://github.com/loco-philippe/ntv-pandas) + +NTV-pandas provides a JSON converter with more data types than the ones supported by pandas directly. + +It supports the following data types: + +- pandas data types +- data types defined in the [NTV format](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- data types defined in [Table Schema specification](https://datapackage.org/standard/table-schema/) + +The interface is always reversible (conversion round trip) with two formats (JSON-NTV and JSON-TableSchema). + +Example: + +```python +import ntv_pandas as npd + +jsn = df.npd.to_json(table=False) # save df as a JSON-value (format Table Schema if table is True else format NTV ) +df = npd.read_json(jsn) # load a JSON-value as a `DataFrame` + +df.equals(npd.read_json(df.npd.to_json(df))) # `True` in any case, whether `table=True` or not +``` + +### [BCPandas](https://github.com/yehoshuadimarsky/bcpandas) + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native `df.to_sql` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for `df.to_sql`. + +### [Deltalake](https://pypi.org/project/deltalake) + +Deltalake python package lets you access tables stored in +[Delta Lake](https://delta.io/) natively in Python without the need to use Spark or +JVM. It provides the `delta_table.to_pyarrow_table().to_pandas()` method to convert +any Delta table into Pandas dataframe. + +### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas) + +pandas-gbq provides high performance reads and writes to and from +[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0), +these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`. +Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead. + +### [ArcticDB](https://github.com/man-group/ArcticDB) + +ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/). + +#### ArcticDB Terminology + +ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components: + +- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server. +- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database. +- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables. +- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object. + +#### Installation + +To install, simply run: + +```console +pip install arcticdb +``` + +To get started, we can import ArcticDB and instantiate it: + +```python +import arcticdb as adb +import numpy as np +import pandas as pd +# this will set up the storage using the local file system +arctic = adb.Arctic("lmdb://arcticdb_test") +``` + +> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.\ +> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`. + +#### Library Setup + +ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use: + +```python +lib = arctic.get_library('sample', create_if_missing=True) +``` + +#### Writing Data to ArcticDB + +Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage. + +```python +df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3) + } +) + +df +df.dtypes +``` + +Write to ArcticDB. + +```python +write_record = lib.write("test", df) +``` + +> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types: +> +> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index) +> - `RangeIndex` +> - `DatetimeIndex` +> - `MultiIndex` composed of above supported types +> +> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc'). + +#### Reading Data from ArcticDB + +Read the data back from storage: + +```python +read_record = lib.read("test") +read_record.data +df.dtypes +``` + +ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder). + +### [Hugging Face](https://huggingface.co/datasets) + +The Hugging Face Dataset Hub provides a large collection of ready-to-use datasets for machine learning shared by the community. The platform offers a user-friendly interface to explore, discover and visualize datasets, and provides tools to easily load and work with these datasets in Python thanks to the [huggingface_hub](https://github.com/huggingface/huggingface_hub) library. + +You can access datasets on Hugging Face using `hf://` paths in pandas, in the form `hf://datasets/username/dataset_name/...`. + +For example, here is how to load the [stanfordnlp/imdb dataset](https://huggingface.co/datasets/stanfordnlp/imdb): + +```python +import pandas as pd + +# Load the IMDB dataset +df = pd.read_parquet("hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet") +``` + +Tip: on a dataset page, click on "Use this dataset" to get the code to load it in pandas. + +To save a dataset on Hugging Face you need to [create a public or private dataset](https://huggingface.co/new-dataset) and [login](https://huggingface.co/docs/huggingface_hub/quick-start#login-command), and then you can use `df.to_csv/to_json/to_parquet`: + +```python +# Save the dataset to my Hugging Face account +df.to_parquet("hf://datasets/username/dataset_name/train.parquet") +``` + +You can find more information about the Hugging Face Dataset Hub in the [documentation](https://huggingface.co/docs/hub/en/datasets). + +## Out-of-core + +### [Bodo](https://github.com/bodo-ai/Bodo) + +Bodo is a high-performance compute engine for Python data processing. +Using an auto-parallelizing just-in-time (JIT) compiler, Bodo simplifies scaling Pandas +workloads from laptops to clusters without major code changes. +Under the hood, Bodo relies on MPI-based high-performance computing (HPC) technology—making it +both easier to use and often much faster than alternatives. +Bodo also provides a SQL engine that can query distributed pandas dataframes efficiently. + +```python +import pandas as pd +import bodo + +@bodo.jit +def process_data(): + df = pd.read_parquet("my_data.pq") + df2 = pd.DataFrame({"A": df.apply(lambda r: 0 if r.A == 0 else (r.B // r.A), axis=1)}) + df2.to_parquet("out.pq") + +process_data() +``` + +### [Cylon](https://cylondata.org/) + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +```python +from pycylon import read_csv, DataFrame, CylonEnv +from pycylon.net import MPIConfig + +# Initialize Cylon distributed environment +config: MPIConfig = MPIConfig() +env: CylonEnv = CylonEnv(config=config, distributed=True) + +df1: DataFrame = read_csv('/tmp/csv1.csv') +df2: DataFrame = read_csv('/tmp/csv2.csv') + +# Using 1000s of cores across the cluster to compute the join +df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + +print(df3) +``` + +### [Dask](https://docs.dask.org) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://ml.dask.org) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Ibis](https://ibis-project.org/docs/) + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Modin](https://github.com/modin-project/modin) + +The `modin.pandas` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (`read_csv`, `read_excel`, +`read_parquet`, etc.). + +```python +# import pandas as pd +import modin.pandas as pd + +df = pd.read_csv("big.csv") # use all your cores! +``` + +### [Pandarallel](https://github.com/nalepae/pandarallel) + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +It also displays progress bars. + +```python +from pandarallel import pandarallel + +pandarallel.initialize(progress_bar=True) + +# df.apply(func) +df.parallel_apply(func) +``` + +### [Vaex](https://vaex.io/docs/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- `vaex.from_pandas` +- `vaex.to_pandas_df` + +### [Hail Query](https://hail.is/) + +An out-of-core, preemptible-safe, distributed, dataframe library serving +the genetics community. Hail Query ships with on-disk data formats, +in-memory data formats, an expression compiler, a query planner, and a +distributed sort algorithm all designed to accelerate queries on large +matrices of genome sequencing data. + +It is often easiest to use pandas to manipulate the summary statistics or +other small aggregates produced by Hail. For this reason, Hail provides +native import to and export from pandas DataFrames: + +- [`Table.from_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.from_pandas) +- [`Table.to_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.to_pandas) + +## Data cleaning and validation + +### [pyjanitor](https://github.com/pyjanitor-devs/pyjanitor) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Pandera](https://pandera.readthedocs.io/en/stable/) + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +## Extension data types + +Pandas provides an interface for defining +[extension types](https://pandas.pydata.org/docs/development/extending.html#extension-types) to extend NumPy's type system. +The following libraries implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [awkward-pandas](https://github.com/scikit-hep/awkward) + +Awkward-pandas provides an extension type for storing Awkward +Arrays inside pandas' Series and +DataFrame. It also provides an accessor for using awkward functions +on Series that are of awkward type. + +### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas) + +db-dtypes provides an extension types for working with types like +DATE, TIME, and JSON from database systems. This package is used +by pandas-gbq to provide natural dtypes for BigQuery data types without +a natural numpy type. + +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working +with genomics data. It also includes `genomics` accessors for many useful properties +and methods related to QC and analysis of genomics data. + +### [Physipandas](https://github.com/mocquin/physipandas) + +Physipandas provides an extension for manipulating physical quantities +(like scalar and numpy.ndarray) in association with a physical unit +(like meter or joule) and additional features for integration of +`physipy` accessors with pandas Series and Dataframe. + +### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) + +Pint-Pandas provides an extension type for storing numeric arrays with units. +These arrays can be stored inside pandas' Series and DataFrame. Operations +between Series and DataFrame columns which use pint's extension array are then +units aware. + +### [Text Extensions](https://ibm.biz/text-extensions-for-pandas) + +Text Extensions for Pandas provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into pandas DataFrames. + +## Accessors + +A directory of projects providing +[extension accessors](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). +This is for users to discover new accessors and for library +authors to coordinate on the namespace. + +| Library | Accessor | Classes | +| -------------------------------------------------------------------- | ---------- | --------------------- | +| [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | +| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | +| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | +| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | +| [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | +| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | +| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | +| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | +| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | + +## Development tools + +### [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through these issues [14468](https://github.com/pandas-dev/pandas/issues/14468), +[26766](https://github.com/pandas-dev/pandas/issues/26766), [28142](https://github.com/pandas-dev/pandas/issues/28142). + +See installation and usage instructions on the [GitHub page](https://github.com/VirtusLab/pandas-stubs). + +### [Hamilton](https://github.com/dagworks-inc/hamilton) + +Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a +Pandas code base, specifically with respect to feature engineering for machine learning models. + +It prescribes an opinionated paradigm, that ensures all code is: + +- unit testable +- integration testing friendly +- documentation friendly +- transformation logic is reusable, as it is decoupled from the context of where it is used. +- integratable with runtime data quality checks. + +This helps one to scale your pandas code base, at the same time, keeping maintenance costs low. + +For more information, see [documentation](https://hamilton.readthedocs.io/). From 30950cd09673d79de3d7655b1ce4aa389f3101ae Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:01 -0500 Subject: [PATCH 118/184] New translations ecosystem.md (Russian) --- web/pandas/ru/community/ecosystem.md | 738 +++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 web/pandas/ru/community/ecosystem.md diff --git a/web/pandas/ru/community/ecosystem.md b/web/pandas/ru/community/ecosystem.md new file mode 100644 index 000000000..f8e721d83 --- /dev/null +++ b/web/pandas/ru/community/ecosystem.md @@ -0,0 +1,738 @@ +# Ecosystem + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around its original requirements. + +This is a community-maintained list of projects that build on pandas in order +to provide tools in the PyData space. The pandas core development team does not necessarily endorse any particular project on this list or have any knowledge of the maintenance status of any particular library. + +For a more complete list of projects that depend on pandas, see the libraries.io usage page for +pandas or search pypi for +pandas. + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [skrub](https://skrub-data.org) + +Skrub facilitates machine learning on dataframes. It bridges pandas +to scikit-learn and related. In particular it facilitates building +features from dataframes. + +### [Featuretools](https://github.com/alteryx/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +### [Compose](https://github.com/alteryx/compose) + +Compose is a machine learning tool for labeling data and prediction engineering. +It allows you to structure the labeling process by parameterizing +prediction problems and transforming time-driven relational data into +target values with cutoff times that can be used for supervised learning. + +### [STUMPY](https://github.com/TDAmeritrade/stumpy) + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +[matrix profile](https://stumpy.readthedocs.io/en/latest/Tutorial_The_Matrix_Profile.html), +which can be used for a wide variety of time series data mining tasks. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://docs.bokeh.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [pygwalker](https://github.com/Kanaries/pygwalker) + +PyGWalker is an interactive data visualization and +exploratory data analysis tool built upon Graphic Walker +with support for visualization, cleaning, and annotation workflows. + +pygwalker can save interactively created charts +to Graphic-Walker and Vega-Lite JSON. + +``` +import pygwalker as pyg +pyg.walk(df) +``` + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +``` +import seaborn as sns +sns.set_theme() +``` + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on "The Grammar of +Graphics" +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of matplotlib, +ggplot for Python, and +Seaborn can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has cloud, offline, or on-premise +accounts for private use. + +### [Lux](https://github.com/lux-org/lux) + +Lux is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +```python +import lux +import pandas as pd + +df = pd.read_csv("data.csv") +df # discover interesting insights! +``` + +By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. + +### [D-Tale](https://github.com/man-group/dtale) + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +```python +import dtale + +dtale.show(df) +``` + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the [grid](http://alphatechadmin.pythonanywhere.com/dtale/main/1). + +### [hvplot](https://hvplot.holoviz.org/index.html) + +hvPlot is a high-level plotting API for the PyData ecosystem built on [HoloViews](https://holoviews.org/). +It can be loaded as a native pandas plotting backend via + +```python +pd.set_option("plotting.backend", "hvplot") +``` + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_` and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) +for pandas `display.` settings. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its Variable +Explorer allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copied/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/current/panes/editor.html) and IPython +Console, and Spyder's +[Help pane](https://docs.spyder-ide.org/current/panes/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +### [marimo](https://marimo.io) + +marimo is a reactive notebook for Python and SQL that enhances productivity when working with dataframes. It provides several features to make data manipulation and visualization more interactive and fun: + +1. Rich, interactive displays: marimo can display pandas dataframes in interactive tables or charts with filtering and sorting capabilities. +2. Data selection: Users can select data in tables or pandas-backed plots, and the selections are automatically sent to Python as pandas dataframes. +3. No-code transformations: Users can interactively transform pandas dataframes using a GUI, without writing code. The generated code can be copied and pasted into the notebook. +4. Custom filters: marimo allows the creation of pandas-backed filters using UI elements like sliders and dropdowns. +5. Dataset explorer: marimo automatically discovers and displays all dataframes in the notebook, allowing users to explore and visualize data interactively. +6. SQL integration: marimo allows users to write SQL queries against any pandas dataframes existing in memory. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the pandas-datareader +docs: + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the Federal Reserve Economic Data +(FRED) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/geopandas/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) + +gurobipy-pandas provides a convenient accessor API to connect pandas with +gurobipy. It enables users to more easily and efficiently build mathematical +optimization models from data stored in DataFrames and Series, and to read +solutions back directly as pandas objects. + +### [staircase](https://github.com/staircase-dev/staircase) + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## IO + +### [NTV-pandas](https://github.com/loco-philippe/ntv-pandas) + +NTV-pandas provides a JSON converter with more data types than the ones supported by pandas directly. + +It supports the following data types: + +- pandas data types +- data types defined in the [NTV format](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- data types defined in [Table Schema specification](https://datapackage.org/standard/table-schema/) + +The interface is always reversible (conversion round trip) with two formats (JSON-NTV and JSON-TableSchema). + +Example: + +```python +import ntv_pandas as npd + +jsn = df.npd.to_json(table=False) # save df as a JSON-value (format Table Schema if table is True else format NTV ) +df = npd.read_json(jsn) # load a JSON-value as a `DataFrame` + +df.equals(npd.read_json(df.npd.to_json(df))) # `True` in any case, whether `table=True` or not +``` + +### [BCPandas](https://github.com/yehoshuadimarsky/bcpandas) + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native `df.to_sql` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for `df.to_sql`. + +### [Deltalake](https://pypi.org/project/deltalake) + +Deltalake python package lets you access tables stored in +[Delta Lake](https://delta.io/) natively in Python without the need to use Spark or +JVM. It provides the `delta_table.to_pyarrow_table().to_pandas()` method to convert +any Delta table into Pandas dataframe. + +### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas) + +pandas-gbq provides high performance reads and writes to and from +[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0), +these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`. +Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead. + +### [ArcticDB](https://github.com/man-group/ArcticDB) + +ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/). + +#### ArcticDB Terminology + +ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components: + +- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server. +- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database. +- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables. +- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object. + +#### Installation + +To install, simply run: + +```console +pip install arcticdb +``` + +To get started, we can import ArcticDB and instantiate it: + +```python +import arcticdb as adb +import numpy as np +import pandas as pd +# this will set up the storage using the local file system +arctic = adb.Arctic("lmdb://arcticdb_test") +``` + +> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.\ +> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`. + +#### Library Setup + +ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use: + +```python +lib = arctic.get_library('sample', create_if_missing=True) +``` + +#### Writing Data to ArcticDB + +Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage. + +```python +df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3) + } +) + +df +df.dtypes +``` + +Write to ArcticDB. + +```python +write_record = lib.write("test", df) +``` + +> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types: +> +> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index) +> - `RangeIndex` +> - `DatetimeIndex` +> - `MultiIndex` composed of above supported types +> +> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc'). + +#### Reading Data from ArcticDB + +Read the data back from storage: + +```python +read_record = lib.read("test") +read_record.data +df.dtypes +``` + +ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder). + +### [Hugging Face](https://huggingface.co/datasets) + +The Hugging Face Dataset Hub provides a large collection of ready-to-use datasets for machine learning shared by the community. The platform offers a user-friendly interface to explore, discover and visualize datasets, and provides tools to easily load and work with these datasets in Python thanks to the [huggingface_hub](https://github.com/huggingface/huggingface_hub) library. + +You can access datasets on Hugging Face using `hf://` paths in pandas, in the form `hf://datasets/username/dataset_name/...`. + +For example, here is how to load the [stanfordnlp/imdb dataset](https://huggingface.co/datasets/stanfordnlp/imdb): + +```python +import pandas as pd + +# Load the IMDB dataset +df = pd.read_parquet("hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet") +``` + +Tip: on a dataset page, click on "Use this dataset" to get the code to load it in pandas. + +To save a dataset on Hugging Face you need to [create a public or private dataset](https://huggingface.co/new-dataset) and [login](https://huggingface.co/docs/huggingface_hub/quick-start#login-command), and then you can use `df.to_csv/to_json/to_parquet`: + +```python +# Save the dataset to my Hugging Face account +df.to_parquet("hf://datasets/username/dataset_name/train.parquet") +``` + +You can find more information about the Hugging Face Dataset Hub in the [documentation](https://huggingface.co/docs/hub/en/datasets). + +## Out-of-core + +### [Bodo](https://github.com/bodo-ai/Bodo) + +Bodo is a high-performance compute engine for Python data processing. +Using an auto-parallelizing just-in-time (JIT) compiler, Bodo simplifies scaling Pandas +workloads from laptops to clusters without major code changes. +Under the hood, Bodo relies on MPI-based high-performance computing (HPC) technology—making it +both easier to use and often much faster than alternatives. +Bodo also provides a SQL engine that can query distributed pandas dataframes efficiently. + +```python +import pandas as pd +import bodo + +@bodo.jit +def process_data(): + df = pd.read_parquet("my_data.pq") + df2 = pd.DataFrame({"A": df.apply(lambda r: 0 if r.A == 0 else (r.B // r.A), axis=1)}) + df2.to_parquet("out.pq") + +process_data() +``` + +### [Cylon](https://cylondata.org/) + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +```python +from pycylon import read_csv, DataFrame, CylonEnv +from pycylon.net import MPIConfig + +# Initialize Cylon distributed environment +config: MPIConfig = MPIConfig() +env: CylonEnv = CylonEnv(config=config, distributed=True) + +df1: DataFrame = read_csv('/tmp/csv1.csv') +df2: DataFrame = read_csv('/tmp/csv2.csv') + +# Using 1000s of cores across the cluster to compute the join +df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + +print(df3) +``` + +### [Dask](https://docs.dask.org) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://ml.dask.org) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Ibis](https://ibis-project.org/docs/) + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Modin](https://github.com/modin-project/modin) + +The `modin.pandas` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (`read_csv`, `read_excel`, +`read_parquet`, etc.). + +```python +# import pandas as pd +import modin.pandas as pd + +df = pd.read_csv("big.csv") # use all your cores! +``` + +### [Pandarallel](https://github.com/nalepae/pandarallel) + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +It also displays progress bars. + +```python +from pandarallel import pandarallel + +pandarallel.initialize(progress_bar=True) + +# df.apply(func) +df.parallel_apply(func) +``` + +### [Vaex](https://vaex.io/docs/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- `vaex.from_pandas` +- `vaex.to_pandas_df` + +### [Hail Query](https://hail.is/) + +An out-of-core, preemptible-safe, distributed, dataframe library serving +the genetics community. Hail Query ships with on-disk data formats, +in-memory data formats, an expression compiler, a query planner, and a +distributed sort algorithm all designed to accelerate queries on large +matrices of genome sequencing data. + +It is often easiest to use pandas to manipulate the summary statistics or +other small aggregates produced by Hail. For this reason, Hail provides +native import to and export from pandas DataFrames: + +- [`Table.from_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.from_pandas) +- [`Table.to_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.to_pandas) + +## Data cleaning and validation + +### [pyjanitor](https://github.com/pyjanitor-devs/pyjanitor) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Pandera](https://pandera.readthedocs.io/en/stable/) + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +## Extension data types + +Pandas provides an interface for defining +[extension types](https://pandas.pydata.org/docs/development/extending.html#extension-types) to extend NumPy's type system. +The following libraries implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [awkward-pandas](https://github.com/scikit-hep/awkward) + +Awkward-pandas provides an extension type for storing Awkward +Arrays inside pandas' Series and +DataFrame. It also provides an accessor for using awkward functions +on Series that are of awkward type. + +### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas) + +db-dtypes provides an extension types for working with types like +DATE, TIME, and JSON from database systems. This package is used +by pandas-gbq to provide natural dtypes for BigQuery data types without +a natural numpy type. + +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working +with genomics data. It also includes `genomics` accessors for many useful properties +and methods related to QC and analysis of genomics data. + +### [Physipandas](https://github.com/mocquin/physipandas) + +Physipandas provides an extension for manipulating physical quantities +(like scalar and numpy.ndarray) in association with a physical unit +(like meter or joule) and additional features for integration of +`physipy` accessors with pandas Series and Dataframe. + +### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) + +Pint-Pandas provides an extension type for storing numeric arrays with units. +These arrays can be stored inside pandas' Series and DataFrame. Operations +between Series and DataFrame columns which use pint's extension array are then +units aware. + +### [Text Extensions](https://ibm.biz/text-extensions-for-pandas) + +Text Extensions for Pandas provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into pandas DataFrames. + +## Accessors + +A directory of projects providing +[extension accessors](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). +This is for users to discover new accessors and for library +authors to coordinate on the namespace. + +| Library | Accessor | Classes | +| -------------------------------------------------------------------- | ---------- | --------------------- | +| [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | +| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | +| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | +| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | +| [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | +| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | +| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | +| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | +| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | + +## Development tools + +### [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through these issues [14468](https://github.com/pandas-dev/pandas/issues/14468), +[26766](https://github.com/pandas-dev/pandas/issues/26766), [28142](https://github.com/pandas-dev/pandas/issues/28142). + +See installation and usage instructions on the [GitHub page](https://github.com/VirtusLab/pandas-stubs). + +### [Hamilton](https://github.com/dagworks-inc/hamilton) + +Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a +Pandas code base, specifically with respect to feature engineering for machine learning models. + +It prescribes an opinionated paradigm, that ensures all code is: + +- unit testable +- integration testing friendly +- documentation friendly +- transformation logic is reusable, as it is decoupled from the context of where it is used. +- integratable with runtime data quality checks. + +This helps one to scale your pandas code base, at the same time, keeping maintenance costs low. + +For more information, see [documentation](https://hamilton.readthedocs.io/). From 23f5b0df526db6c5abc3053b368ff9cf36e54f78 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:03 -0500 Subject: [PATCH 119/184] New translations ecosystem.md (Chinese Simplified) --- web/pandas/zh/community/ecosystem.md | 738 +++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 web/pandas/zh/community/ecosystem.md diff --git a/web/pandas/zh/community/ecosystem.md b/web/pandas/zh/community/ecosystem.md new file mode 100644 index 000000000..f8e721d83 --- /dev/null +++ b/web/pandas/zh/community/ecosystem.md @@ -0,0 +1,738 @@ +# Ecosystem + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around its original requirements. + +This is a community-maintained list of projects that build on pandas in order +to provide tools in the PyData space. The pandas core development team does not necessarily endorse any particular project on this list or have any knowledge of the maintenance status of any particular library. + +For a more complete list of projects that depend on pandas, see the libraries.io usage page for +pandas or search pypi for +pandas. + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [skrub](https://skrub-data.org) + +Skrub facilitates machine learning on dataframes. It bridges pandas +to scikit-learn and related. In particular it facilitates building +features from dataframes. + +### [Featuretools](https://github.com/alteryx/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +### [Compose](https://github.com/alteryx/compose) + +Compose is a machine learning tool for labeling data and prediction engineering. +It allows you to structure the labeling process by parameterizing +prediction problems and transforming time-driven relational data into +target values with cutoff times that can be used for supervised learning. + +### [STUMPY](https://github.com/TDAmeritrade/stumpy) + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +[matrix profile](https://stumpy.readthedocs.io/en/latest/Tutorial_The_Matrix_Profile.html), +which can be used for a wide variety of time series data mining tasks. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://docs.bokeh.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [pygwalker](https://github.com/Kanaries/pygwalker) + +PyGWalker is an interactive data visualization and +exploratory data analysis tool built upon Graphic Walker +with support for visualization, cleaning, and annotation workflows. + +pygwalker can save interactively created charts +to Graphic-Walker and Vega-Lite JSON. + +``` +import pygwalker as pyg +pyg.walk(df) +``` + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +``` +import seaborn as sns +sns.set_theme() +``` + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on "The Grammar of +Graphics" +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of matplotlib, +ggplot for Python, and +Seaborn can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has cloud, offline, or on-premise +accounts for private use. + +### [Lux](https://github.com/lux-org/lux) + +Lux is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +```python +import lux +import pandas as pd + +df = pd.read_csv("data.csv") +df # discover interesting insights! +``` + +By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. + +### [D-Tale](https://github.com/man-group/dtale) + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +```python +import dtale + +dtale.show(df) +``` + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the [grid](http://alphatechadmin.pythonanywhere.com/dtale/main/1). + +### [hvplot](https://hvplot.holoviz.org/index.html) + +hvPlot is a high-level plotting API for the PyData ecosystem built on [HoloViews](https://holoviews.org/). +It can be loaded as a native pandas plotting backend via + +```python +pd.set_option("plotting.backend", "hvplot") +``` + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_` and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) +for pandas `display.` settings. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its Variable +Explorer allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copied/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/current/panes/editor.html) and IPython +Console, and Spyder's +[Help pane](https://docs.spyder-ide.org/current/panes/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +### [marimo](https://marimo.io) + +marimo is a reactive notebook for Python and SQL that enhances productivity when working with dataframes. It provides several features to make data manipulation and visualization more interactive and fun: + +1. Rich, interactive displays: marimo can display pandas dataframes in interactive tables or charts with filtering and sorting capabilities. +2. Data selection: Users can select data in tables or pandas-backed plots, and the selections are automatically sent to Python as pandas dataframes. +3. No-code transformations: Users can interactively transform pandas dataframes using a GUI, without writing code. The generated code can be copied and pasted into the notebook. +4. Custom filters: marimo allows the creation of pandas-backed filters using UI elements like sliders and dropdowns. +5. Dataset explorer: marimo automatically discovers and displays all dataframes in the notebook, allowing users to explore and visualize data interactively. +6. SQL integration: marimo allows users to write SQL queries against any pandas dataframes existing in memory. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the pandas-datareader +docs: + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the Federal Reserve Economic Data +(FRED) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/geopandas/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) + +gurobipy-pandas provides a convenient accessor API to connect pandas with +gurobipy. It enables users to more easily and efficiently build mathematical +optimization models from data stored in DataFrames and Series, and to read +solutions back directly as pandas objects. + +### [staircase](https://github.com/staircase-dev/staircase) + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## IO + +### [NTV-pandas](https://github.com/loco-philippe/ntv-pandas) + +NTV-pandas provides a JSON converter with more data types than the ones supported by pandas directly. + +It supports the following data types: + +- pandas data types +- data types defined in the [NTV format](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- data types defined in [Table Schema specification](https://datapackage.org/standard/table-schema/) + +The interface is always reversible (conversion round trip) with two formats (JSON-NTV and JSON-TableSchema). + +Example: + +```python +import ntv_pandas as npd + +jsn = df.npd.to_json(table=False) # save df as a JSON-value (format Table Schema if table is True else format NTV ) +df = npd.read_json(jsn) # load a JSON-value as a `DataFrame` + +df.equals(npd.read_json(df.npd.to_json(df))) # `True` in any case, whether `table=True` or not +``` + +### [BCPandas](https://github.com/yehoshuadimarsky/bcpandas) + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native `df.to_sql` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for `df.to_sql`. + +### [Deltalake](https://pypi.org/project/deltalake) + +Deltalake python package lets you access tables stored in +[Delta Lake](https://delta.io/) natively in Python without the need to use Spark or +JVM. It provides the `delta_table.to_pyarrow_table().to_pandas()` method to convert +any Delta table into Pandas dataframe. + +### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas) + +pandas-gbq provides high performance reads and writes to and from +[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0), +these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`. +Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead. + +### [ArcticDB](https://github.com/man-group/ArcticDB) + +ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/). + +#### ArcticDB Terminology + +ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components: + +- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server. +- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database. +- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables. +- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object. + +#### Installation + +To install, simply run: + +```console +pip install arcticdb +``` + +To get started, we can import ArcticDB and instantiate it: + +```python +import arcticdb as adb +import numpy as np +import pandas as pd +# this will set up the storage using the local file system +arctic = adb.Arctic("lmdb://arcticdb_test") +``` + +> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.\ +> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`. + +#### Library Setup + +ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use: + +```python +lib = arctic.get_library('sample', create_if_missing=True) +``` + +#### Writing Data to ArcticDB + +Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage. + +```python +df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3) + } +) + +df +df.dtypes +``` + +Write to ArcticDB. + +```python +write_record = lib.write("test", df) +``` + +> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types: +> +> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index) +> - `RangeIndex` +> - `DatetimeIndex` +> - `MultiIndex` composed of above supported types +> +> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc'). + +#### Reading Data from ArcticDB + +Read the data back from storage: + +```python +read_record = lib.read("test") +read_record.data +df.dtypes +``` + +ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder). + +### [Hugging Face](https://huggingface.co/datasets) + +The Hugging Face Dataset Hub provides a large collection of ready-to-use datasets for machine learning shared by the community. The platform offers a user-friendly interface to explore, discover and visualize datasets, and provides tools to easily load and work with these datasets in Python thanks to the [huggingface_hub](https://github.com/huggingface/huggingface_hub) library. + +You can access datasets on Hugging Face using `hf://` paths in pandas, in the form `hf://datasets/username/dataset_name/...`. + +For example, here is how to load the [stanfordnlp/imdb dataset](https://huggingface.co/datasets/stanfordnlp/imdb): + +```python +import pandas as pd + +# Load the IMDB dataset +df = pd.read_parquet("hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet") +``` + +Tip: on a dataset page, click on "Use this dataset" to get the code to load it in pandas. + +To save a dataset on Hugging Face you need to [create a public or private dataset](https://huggingface.co/new-dataset) and [login](https://huggingface.co/docs/huggingface_hub/quick-start#login-command), and then you can use `df.to_csv/to_json/to_parquet`: + +```python +# Save the dataset to my Hugging Face account +df.to_parquet("hf://datasets/username/dataset_name/train.parquet") +``` + +You can find more information about the Hugging Face Dataset Hub in the [documentation](https://huggingface.co/docs/hub/en/datasets). + +## Out-of-core + +### [Bodo](https://github.com/bodo-ai/Bodo) + +Bodo is a high-performance compute engine for Python data processing. +Using an auto-parallelizing just-in-time (JIT) compiler, Bodo simplifies scaling Pandas +workloads from laptops to clusters without major code changes. +Under the hood, Bodo relies on MPI-based high-performance computing (HPC) technology—making it +both easier to use and often much faster than alternatives. +Bodo also provides a SQL engine that can query distributed pandas dataframes efficiently. + +```python +import pandas as pd +import bodo + +@bodo.jit +def process_data(): + df = pd.read_parquet("my_data.pq") + df2 = pd.DataFrame({"A": df.apply(lambda r: 0 if r.A == 0 else (r.B // r.A), axis=1)}) + df2.to_parquet("out.pq") + +process_data() +``` + +### [Cylon](https://cylondata.org/) + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +```python +from pycylon import read_csv, DataFrame, CylonEnv +from pycylon.net import MPIConfig + +# Initialize Cylon distributed environment +config: MPIConfig = MPIConfig() +env: CylonEnv = CylonEnv(config=config, distributed=True) + +df1: DataFrame = read_csv('/tmp/csv1.csv') +df2: DataFrame = read_csv('/tmp/csv2.csv') + +# Using 1000s of cores across the cluster to compute the join +df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + +print(df3) +``` + +### [Dask](https://docs.dask.org) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://ml.dask.org) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Ibis](https://ibis-project.org/docs/) + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Modin](https://github.com/modin-project/modin) + +The `modin.pandas` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (`read_csv`, `read_excel`, +`read_parquet`, etc.). + +```python +# import pandas as pd +import modin.pandas as pd + +df = pd.read_csv("big.csv") # use all your cores! +``` + +### [Pandarallel](https://github.com/nalepae/pandarallel) + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +It also displays progress bars. + +```python +from pandarallel import pandarallel + +pandarallel.initialize(progress_bar=True) + +# df.apply(func) +df.parallel_apply(func) +``` + +### [Vaex](https://vaex.io/docs/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- `vaex.from_pandas` +- `vaex.to_pandas_df` + +### [Hail Query](https://hail.is/) + +An out-of-core, preemptible-safe, distributed, dataframe library serving +the genetics community. Hail Query ships with on-disk data formats, +in-memory data formats, an expression compiler, a query planner, and a +distributed sort algorithm all designed to accelerate queries on large +matrices of genome sequencing data. + +It is often easiest to use pandas to manipulate the summary statistics or +other small aggregates produced by Hail. For this reason, Hail provides +native import to and export from pandas DataFrames: + +- [`Table.from_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.from_pandas) +- [`Table.to_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.to_pandas) + +## Data cleaning and validation + +### [pyjanitor](https://github.com/pyjanitor-devs/pyjanitor) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Pandera](https://pandera.readthedocs.io/en/stable/) + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +## Extension data types + +Pandas provides an interface for defining +[extension types](https://pandas.pydata.org/docs/development/extending.html#extension-types) to extend NumPy's type system. +The following libraries implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [awkward-pandas](https://github.com/scikit-hep/awkward) + +Awkward-pandas provides an extension type for storing Awkward +Arrays inside pandas' Series and +DataFrame. It also provides an accessor for using awkward functions +on Series that are of awkward type. + +### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas) + +db-dtypes provides an extension types for working with types like +DATE, TIME, and JSON from database systems. This package is used +by pandas-gbq to provide natural dtypes for BigQuery data types without +a natural numpy type. + +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working +with genomics data. It also includes `genomics` accessors for many useful properties +and methods related to QC and analysis of genomics data. + +### [Physipandas](https://github.com/mocquin/physipandas) + +Physipandas provides an extension for manipulating physical quantities +(like scalar and numpy.ndarray) in association with a physical unit +(like meter or joule) and additional features for integration of +`physipy` accessors with pandas Series and Dataframe. + +### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) + +Pint-Pandas provides an extension type for storing numeric arrays with units. +These arrays can be stored inside pandas' Series and DataFrame. Operations +between Series and DataFrame columns which use pint's extension array are then +units aware. + +### [Text Extensions](https://ibm.biz/text-extensions-for-pandas) + +Text Extensions for Pandas provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into pandas DataFrames. + +## Accessors + +A directory of projects providing +[extension accessors](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). +This is for users to discover new accessors and for library +authors to coordinate on the namespace. + +| Library | Accessor | Classes | +| -------------------------------------------------------------------- | ---------- | --------------------- | +| [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | +| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | +| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | +| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | +| [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | +| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | +| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | +| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | +| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | + +## Development tools + +### [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through these issues [14468](https://github.com/pandas-dev/pandas/issues/14468), +[26766](https://github.com/pandas-dev/pandas/issues/26766), [28142](https://github.com/pandas-dev/pandas/issues/28142). + +See installation and usage instructions on the [GitHub page](https://github.com/VirtusLab/pandas-stubs). + +### [Hamilton](https://github.com/dagworks-inc/hamilton) + +Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a +Pandas code base, specifically with respect to feature engineering for machine learning models. + +It prescribes an opinionated paradigm, that ensures all code is: + +- unit testable +- integration testing friendly +- documentation friendly +- transformation logic is reusable, as it is decoupled from the context of where it is used. +- integratable with runtime data quality checks. + +This helps one to scale your pandas code base, at the same time, keeping maintenance costs low. + +For more information, see [documentation](https://hamilton.readthedocs.io/). From a3839024fba35cbafa6453f913121c5f7c4531bd Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:04 -0500 Subject: [PATCH 120/184] New translations ecosystem.md (Portuguese, Brazilian) --- web/pandas/pt/community/ecosystem.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/web/pandas/pt/community/ecosystem.md b/web/pandas/pt/community/ecosystem.md index e2bf55014..594c1147c 100644 --- a/web/pandas/pt/community/ecosystem.md +++ b/web/pandas/pt/community/ecosystem.md @@ -1,6 +1,7 @@ # Ecossistema Cada vez mais, os pacotes estão sendo construídos em cima do pandas para abordar +necessidades específicas na preparação de dados, análise e visualização. Cada vez mais, os pacotes estão sendo construídos em cima do pandas para abordar necessidades específicas na preparação de dados, análise e visualização. Isso é encorajador porque significa que pandas não só está ajudando os usuários a lidar com suas tarefas de dados, mas também que fornece um melhor ponto de partida para desenvolvedores @@ -15,8 +16,7 @@ Para uma lista mais completa de projetos que dependem dos pandas, veja a página pandas ou procure no pypi por pandas. -Gostaríamos de facilitar para os usuários encontrarem esses projetos. -Se você conhece outros projetos importantes que acha que deveriam +Gostaríamos de facilitar para os usuários encontrarem esses projetos. Se você conhece outros projetos importantes que acha que deveriam estar nesta lista, informe-nos. ## Estatísticas e aprendizado de máquina @@ -93,7 +93,7 @@ sns.set_theme() ### [plotnine](https://github.com/has2k1/plotnine/) -O [ggplot2](https://ggplot2.tidyverse.org/) de Hadley Wickham é um pacote de visualização exploratória fundamental para a linguagem R. Baseado em ["The Grammar of Graphics"](https://www.cs.uic.edu/~wilkinson/TheGrammarOfGraphics/GOG.html), ele fornece uma maneira poderosa, declarativa e extremamente geral de gerar gráficos personalizados de qualquer tipo de dado. Várias implementações para outras linguagens estão disponíveis. Uma boa implementação para usuários de Python é has2k1/plotnine. +O [ggplot2](https://ggplot2.tidyverse.org/) de Hadley Wickham é um pacote de visualização exploratória fundamental para a linguagem R. Baseado em ["The Grammar of Graphics"](https://www.cs.uic.edu/~wilkinson/TheGrammarOfGraphics/GOG.html), ele fornece uma maneira poderosa, declarativa e extremamente geral de gerar gráficos personalizados de qualquer tipo de dado. Baseado em ["The Grammar of Graphics"](https://www.cs.uic.edu/~wilkinson/TheGrammarOfGraphics/GOG.html), ele fornece uma maneira poderosa, declarativa e extremamente geral de gerar gráficos personalizados de qualquer tipo de dado. Várias implementações para outras linguagens estão disponíveis. Uma boa implementação para usuários de Python é has2k1/plotnine. Várias implementações para outras linguagens estão disponíveis. Uma boa implementação para usuários de Python é [has2k1/plotnine](https://github.com/has2k1/plotnine/). @@ -234,7 +234,7 @@ O NTV-pandas fornece um conversor JSON com mais tipos de dados do que os suporta Ele suporta os seguintes tipos de dados: - tipos de dados do pandas -- tipos de dados definidos no [formato NTV](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- tipos de dados definidos no [formato NTV](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\\(JSON-NTV\\).htm) - tipos de dados definidos na [especificação Table Schema](http://dataprotocols.org/json-table-schema/#field-types-and-formats) A interface é sempre reversível (conversão de ida e volta) com dois formatos (JSON-NTV e JSON-TableSchema). @@ -273,7 +273,7 @@ O ArcticDB é um mecanismo de banco de dados DataFrame sem servidor projetado pa O ArcticDB é estruturado para fornecer uma maneira escalável e eficiente de gerenciar e recuperar DataFrames, organizados em vários componentes principais: - Coleções de bibliotecas de `Object Store`. Usadas para separar ambientes lógicos uns dos outros. Análogo a um servidor de banco de dados. -- `Library` contém vários símbolos que são agrupados de uma certa maneira (diferentes usuários, mercados, etc.). Análogo a um banco de dados. Análogo a um banco de dados. +- `Library` contém vários símbolos que são agrupados de uma certa maneira (diferentes usuários, mercados, etc.). Análogo a um banco de dados. - `Symbol` Unidade atômica de armazenamento de dados. Identificado por um nome string. Dados armazenados sob um símbolo se assemelham muito a um DataFrame do pandas. Análogo a tabelas. - `Version` Cada ação de modificação (escrever, anexar, atualizar) realizada em um símbolo cria uma nova versão desse objeto. @@ -296,6 +296,7 @@ arctic = adb.Arctic("lmdb://arcticdb_test") ``` > **Observação:** o ArcticDB oferece suporte a qualquer armazenamento compatível com a API S3, incluindo AWS. O ArcticDB também oferece suporte ao armazenamento Azure Blob.\ +> O ArcticDB também oferece suporte ao armazenamento Azure Blob.\ > O ArcticDB também oferece suporte ao LMDB para armazenamento local/baseado em arquivo - para usar o LMDB, passe um caminho do LMDB como URI: adb.Arctic('lmdb://caminho/para/banco-de-dados/desejado'). #### Configuração da biblioteca From 888f88697d7cbbae91b9f4a6d9ddde96139242c3 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:06 -0500 Subject: [PATCH 121/184] New translations ecosystem.md (Persian) --- web/pandas/fa/community/ecosystem.md | 738 +++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 web/pandas/fa/community/ecosystem.md diff --git a/web/pandas/fa/community/ecosystem.md b/web/pandas/fa/community/ecosystem.md new file mode 100644 index 000000000..f8e721d83 --- /dev/null +++ b/web/pandas/fa/community/ecosystem.md @@ -0,0 +1,738 @@ +# Ecosystem + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around its original requirements. + +This is a community-maintained list of projects that build on pandas in order +to provide tools in the PyData space. The pandas core development team does not necessarily endorse any particular project on this list or have any knowledge of the maintenance status of any particular library. + +For a more complete list of projects that depend on pandas, see the libraries.io usage page for +pandas or search pypi for +pandas. + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [skrub](https://skrub-data.org) + +Skrub facilitates machine learning on dataframes. It bridges pandas +to scikit-learn and related. In particular it facilitates building +features from dataframes. + +### [Featuretools](https://github.com/alteryx/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +### [Compose](https://github.com/alteryx/compose) + +Compose is a machine learning tool for labeling data and prediction engineering. +It allows you to structure the labeling process by parameterizing +prediction problems and transforming time-driven relational data into +target values with cutoff times that can be used for supervised learning. + +### [STUMPY](https://github.com/TDAmeritrade/stumpy) + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +[matrix profile](https://stumpy.readthedocs.io/en/latest/Tutorial_The_Matrix_Profile.html), +which can be used for a wide variety of time series data mining tasks. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://docs.bokeh.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [pygwalker](https://github.com/Kanaries/pygwalker) + +PyGWalker is an interactive data visualization and +exploratory data analysis tool built upon Graphic Walker +with support for visualization, cleaning, and annotation workflows. + +pygwalker can save interactively created charts +to Graphic-Walker and Vega-Lite JSON. + +``` +import pygwalker as pyg +pyg.walk(df) +``` + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +``` +import seaborn as sns +sns.set_theme() +``` + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on "The Grammar of +Graphics" +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of matplotlib, +ggplot for Python, and +Seaborn can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has cloud, offline, or on-premise +accounts for private use. + +### [Lux](https://github.com/lux-org/lux) + +Lux is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +```python +import lux +import pandas as pd + +df = pd.read_csv("data.csv") +df # discover interesting insights! +``` + +By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. + +### [D-Tale](https://github.com/man-group/dtale) + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +```python +import dtale + +dtale.show(df) +``` + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the [grid](http://alphatechadmin.pythonanywhere.com/dtale/main/1). + +### [hvplot](https://hvplot.holoviz.org/index.html) + +hvPlot is a high-level plotting API for the PyData ecosystem built on [HoloViews](https://holoviews.org/). +It can be loaded as a native pandas plotting backend via + +```python +pd.set_option("plotting.backend", "hvplot") +``` + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_` and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) +for pandas `display.` settings. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its Variable +Explorer allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copied/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/current/panes/editor.html) and IPython +Console, and Spyder's +[Help pane](https://docs.spyder-ide.org/current/panes/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +### [marimo](https://marimo.io) + +marimo is a reactive notebook for Python and SQL that enhances productivity when working with dataframes. It provides several features to make data manipulation and visualization more interactive and fun: + +1. Rich, interactive displays: marimo can display pandas dataframes in interactive tables or charts with filtering and sorting capabilities. +2. Data selection: Users can select data in tables or pandas-backed plots, and the selections are automatically sent to Python as pandas dataframes. +3. No-code transformations: Users can interactively transform pandas dataframes using a GUI, without writing code. The generated code can be copied and pasted into the notebook. +4. Custom filters: marimo allows the creation of pandas-backed filters using UI elements like sliders and dropdowns. +5. Dataset explorer: marimo automatically discovers and displays all dataframes in the notebook, allowing users to explore and visualize data interactively. +6. SQL integration: marimo allows users to write SQL queries against any pandas dataframes existing in memory. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the pandas-datareader +docs: + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the Federal Reserve Economic Data +(FRED) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/geopandas/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) + +gurobipy-pandas provides a convenient accessor API to connect pandas with +gurobipy. It enables users to more easily and efficiently build mathematical +optimization models from data stored in DataFrames and Series, and to read +solutions back directly as pandas objects. + +### [staircase](https://github.com/staircase-dev/staircase) + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## IO + +### [NTV-pandas](https://github.com/loco-philippe/ntv-pandas) + +NTV-pandas provides a JSON converter with more data types than the ones supported by pandas directly. + +It supports the following data types: + +- pandas data types +- data types defined in the [NTV format](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- data types defined in [Table Schema specification](https://datapackage.org/standard/table-schema/) + +The interface is always reversible (conversion round trip) with two formats (JSON-NTV and JSON-TableSchema). + +Example: + +```python +import ntv_pandas as npd + +jsn = df.npd.to_json(table=False) # save df as a JSON-value (format Table Schema if table is True else format NTV ) +df = npd.read_json(jsn) # load a JSON-value as a `DataFrame` + +df.equals(npd.read_json(df.npd.to_json(df))) # `True` in any case, whether `table=True` or not +``` + +### [BCPandas](https://github.com/yehoshuadimarsky/bcpandas) + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native `df.to_sql` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for `df.to_sql`. + +### [Deltalake](https://pypi.org/project/deltalake) + +Deltalake python package lets you access tables stored in +[Delta Lake](https://delta.io/) natively in Python without the need to use Spark or +JVM. It provides the `delta_table.to_pyarrow_table().to_pandas()` method to convert +any Delta table into Pandas dataframe. + +### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas) + +pandas-gbq provides high performance reads and writes to and from +[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0), +these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`. +Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead. + +### [ArcticDB](https://github.com/man-group/ArcticDB) + +ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/). + +#### ArcticDB Terminology + +ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components: + +- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server. +- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database. +- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables. +- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object. + +#### Installation + +To install, simply run: + +```console +pip install arcticdb +``` + +To get started, we can import ArcticDB and instantiate it: + +```python +import arcticdb as adb +import numpy as np +import pandas as pd +# this will set up the storage using the local file system +arctic = adb.Arctic("lmdb://arcticdb_test") +``` + +> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.\ +> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`. + +#### Library Setup + +ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use: + +```python +lib = arctic.get_library('sample', create_if_missing=True) +``` + +#### Writing Data to ArcticDB + +Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage. + +```python +df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3) + } +) + +df +df.dtypes +``` + +Write to ArcticDB. + +```python +write_record = lib.write("test", df) +``` + +> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types: +> +> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index) +> - `RangeIndex` +> - `DatetimeIndex` +> - `MultiIndex` composed of above supported types +> +> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc'). + +#### Reading Data from ArcticDB + +Read the data back from storage: + +```python +read_record = lib.read("test") +read_record.data +df.dtypes +``` + +ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder). + +### [Hugging Face](https://huggingface.co/datasets) + +The Hugging Face Dataset Hub provides a large collection of ready-to-use datasets for machine learning shared by the community. The platform offers a user-friendly interface to explore, discover and visualize datasets, and provides tools to easily load and work with these datasets in Python thanks to the [huggingface_hub](https://github.com/huggingface/huggingface_hub) library. + +You can access datasets on Hugging Face using `hf://` paths in pandas, in the form `hf://datasets/username/dataset_name/...`. + +For example, here is how to load the [stanfordnlp/imdb dataset](https://huggingface.co/datasets/stanfordnlp/imdb): + +```python +import pandas as pd + +# Load the IMDB dataset +df = pd.read_parquet("hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet") +``` + +Tip: on a dataset page, click on "Use this dataset" to get the code to load it in pandas. + +To save a dataset on Hugging Face you need to [create a public or private dataset](https://huggingface.co/new-dataset) and [login](https://huggingface.co/docs/huggingface_hub/quick-start#login-command), and then you can use `df.to_csv/to_json/to_parquet`: + +```python +# Save the dataset to my Hugging Face account +df.to_parquet("hf://datasets/username/dataset_name/train.parquet") +``` + +You can find more information about the Hugging Face Dataset Hub in the [documentation](https://huggingface.co/docs/hub/en/datasets). + +## Out-of-core + +### [Bodo](https://github.com/bodo-ai/Bodo) + +Bodo is a high-performance compute engine for Python data processing. +Using an auto-parallelizing just-in-time (JIT) compiler, Bodo simplifies scaling Pandas +workloads from laptops to clusters without major code changes. +Under the hood, Bodo relies on MPI-based high-performance computing (HPC) technology—making it +both easier to use and often much faster than alternatives. +Bodo also provides a SQL engine that can query distributed pandas dataframes efficiently. + +```python +import pandas as pd +import bodo + +@bodo.jit +def process_data(): + df = pd.read_parquet("my_data.pq") + df2 = pd.DataFrame({"A": df.apply(lambda r: 0 if r.A == 0 else (r.B // r.A), axis=1)}) + df2.to_parquet("out.pq") + +process_data() +``` + +### [Cylon](https://cylondata.org/) + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +```python +from pycylon import read_csv, DataFrame, CylonEnv +from pycylon.net import MPIConfig + +# Initialize Cylon distributed environment +config: MPIConfig = MPIConfig() +env: CylonEnv = CylonEnv(config=config, distributed=True) + +df1: DataFrame = read_csv('/tmp/csv1.csv') +df2: DataFrame = read_csv('/tmp/csv2.csv') + +# Using 1000s of cores across the cluster to compute the join +df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + +print(df3) +``` + +### [Dask](https://docs.dask.org) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://ml.dask.org) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Ibis](https://ibis-project.org/docs/) + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Modin](https://github.com/modin-project/modin) + +The `modin.pandas` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (`read_csv`, `read_excel`, +`read_parquet`, etc.). + +```python +# import pandas as pd +import modin.pandas as pd + +df = pd.read_csv("big.csv") # use all your cores! +``` + +### [Pandarallel](https://github.com/nalepae/pandarallel) + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +It also displays progress bars. + +```python +from pandarallel import pandarallel + +pandarallel.initialize(progress_bar=True) + +# df.apply(func) +df.parallel_apply(func) +``` + +### [Vaex](https://vaex.io/docs/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- `vaex.from_pandas` +- `vaex.to_pandas_df` + +### [Hail Query](https://hail.is/) + +An out-of-core, preemptible-safe, distributed, dataframe library serving +the genetics community. Hail Query ships with on-disk data formats, +in-memory data formats, an expression compiler, a query planner, and a +distributed sort algorithm all designed to accelerate queries on large +matrices of genome sequencing data. + +It is often easiest to use pandas to manipulate the summary statistics or +other small aggregates produced by Hail. For this reason, Hail provides +native import to and export from pandas DataFrames: + +- [`Table.from_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.from_pandas) +- [`Table.to_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.to_pandas) + +## Data cleaning and validation + +### [pyjanitor](https://github.com/pyjanitor-devs/pyjanitor) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Pandera](https://pandera.readthedocs.io/en/stable/) + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +## Extension data types + +Pandas provides an interface for defining +[extension types](https://pandas.pydata.org/docs/development/extending.html#extension-types) to extend NumPy's type system. +The following libraries implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [awkward-pandas](https://github.com/scikit-hep/awkward) + +Awkward-pandas provides an extension type for storing Awkward +Arrays inside pandas' Series and +DataFrame. It also provides an accessor for using awkward functions +on Series that are of awkward type. + +### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas) + +db-dtypes provides an extension types for working with types like +DATE, TIME, and JSON from database systems. This package is used +by pandas-gbq to provide natural dtypes for BigQuery data types without +a natural numpy type. + +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working +with genomics data. It also includes `genomics` accessors for many useful properties +and methods related to QC and analysis of genomics data. + +### [Physipandas](https://github.com/mocquin/physipandas) + +Physipandas provides an extension for manipulating physical quantities +(like scalar and numpy.ndarray) in association with a physical unit +(like meter or joule) and additional features for integration of +`physipy` accessors with pandas Series and Dataframe. + +### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) + +Pint-Pandas provides an extension type for storing numeric arrays with units. +These arrays can be stored inside pandas' Series and DataFrame. Operations +between Series and DataFrame columns which use pint's extension array are then +units aware. + +### [Text Extensions](https://ibm.biz/text-extensions-for-pandas) + +Text Extensions for Pandas provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into pandas DataFrames. + +## Accessors + +A directory of projects providing +[extension accessors](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). +This is for users to discover new accessors and for library +authors to coordinate on the namespace. + +| Library | Accessor | Classes | +| -------------------------------------------------------------------- | ---------- | --------------------- | +| [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | +| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | +| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | +| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | +| [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | +| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | +| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | +| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | +| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | + +## Development tools + +### [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through these issues [14468](https://github.com/pandas-dev/pandas/issues/14468), +[26766](https://github.com/pandas-dev/pandas/issues/26766), [28142](https://github.com/pandas-dev/pandas/issues/28142). + +See installation and usage instructions on the [GitHub page](https://github.com/VirtusLab/pandas-stubs). + +### [Hamilton](https://github.com/dagworks-inc/hamilton) + +Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a +Pandas code base, specifically with respect to feature engineering for machine learning models. + +It prescribes an opinionated paradigm, that ensures all code is: + +- unit testable +- integration testing friendly +- documentation friendly +- transformation logic is reusable, as it is decoupled from the context of where it is used. +- integratable with runtime data quality checks. + +This helps one to scale your pandas code base, at the same time, keeping maintenance costs low. + +For more information, see [documentation](https://hamilton.readthedocs.io/). From 1c7e0b462def9fefd65ca158105648bfe84187d2 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:07 -0500 Subject: [PATCH 122/184] New translations ecosystem.md (Tamil) --- web/pandas/ta/community/ecosystem.md | 738 +++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 web/pandas/ta/community/ecosystem.md diff --git a/web/pandas/ta/community/ecosystem.md b/web/pandas/ta/community/ecosystem.md new file mode 100644 index 000000000..f8e721d83 --- /dev/null +++ b/web/pandas/ta/community/ecosystem.md @@ -0,0 +1,738 @@ +# Ecosystem + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around its original requirements. + +This is a community-maintained list of projects that build on pandas in order +to provide tools in the PyData space. The pandas core development team does not necessarily endorse any particular project on this list or have any knowledge of the maintenance status of any particular library. + +For a more complete list of projects that depend on pandas, see the libraries.io usage page for +pandas or search pypi for +pandas. + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [skrub](https://skrub-data.org) + +Skrub facilitates machine learning on dataframes. It bridges pandas +to scikit-learn and related. In particular it facilitates building +features from dataframes. + +### [Featuretools](https://github.com/alteryx/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +### [Compose](https://github.com/alteryx/compose) + +Compose is a machine learning tool for labeling data and prediction engineering. +It allows you to structure the labeling process by parameterizing +prediction problems and transforming time-driven relational data into +target values with cutoff times that can be used for supervised learning. + +### [STUMPY](https://github.com/TDAmeritrade/stumpy) + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +[matrix profile](https://stumpy.readthedocs.io/en/latest/Tutorial_The_Matrix_Profile.html), +which can be used for a wide variety of time series data mining tasks. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://docs.bokeh.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [pygwalker](https://github.com/Kanaries/pygwalker) + +PyGWalker is an interactive data visualization and +exploratory data analysis tool built upon Graphic Walker +with support for visualization, cleaning, and annotation workflows. + +pygwalker can save interactively created charts +to Graphic-Walker and Vega-Lite JSON. + +``` +import pygwalker as pyg +pyg.walk(df) +``` + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +``` +import seaborn as sns +sns.set_theme() +``` + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on "The Grammar of +Graphics" +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of matplotlib, +ggplot for Python, and +Seaborn can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has cloud, offline, or on-premise +accounts for private use. + +### [Lux](https://github.com/lux-org/lux) + +Lux is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +```python +import lux +import pandas as pd + +df = pd.read_csv("data.csv") +df # discover interesting insights! +``` + +By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. + +### [D-Tale](https://github.com/man-group/dtale) + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +```python +import dtale + +dtale.show(df) +``` + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the [grid](http://alphatechadmin.pythonanywhere.com/dtale/main/1). + +### [hvplot](https://hvplot.holoviz.org/index.html) + +hvPlot is a high-level plotting API for the PyData ecosystem built on [HoloViews](https://holoviews.org/). +It can be loaded as a native pandas plotting backend via + +```python +pd.set_option("plotting.backend", "hvplot") +``` + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_` and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) +for pandas `display.` settings. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its Variable +Explorer allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copied/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/current/panes/editor.html) and IPython +Console, and Spyder's +[Help pane](https://docs.spyder-ide.org/current/panes/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +### [marimo](https://marimo.io) + +marimo is a reactive notebook for Python and SQL that enhances productivity when working with dataframes. It provides several features to make data manipulation and visualization more interactive and fun: + +1. Rich, interactive displays: marimo can display pandas dataframes in interactive tables or charts with filtering and sorting capabilities. +2. Data selection: Users can select data in tables or pandas-backed plots, and the selections are automatically sent to Python as pandas dataframes. +3. No-code transformations: Users can interactively transform pandas dataframes using a GUI, without writing code. The generated code can be copied and pasted into the notebook. +4. Custom filters: marimo allows the creation of pandas-backed filters using UI elements like sliders and dropdowns. +5. Dataset explorer: marimo automatically discovers and displays all dataframes in the notebook, allowing users to explore and visualize data interactively. +6. SQL integration: marimo allows users to write SQL queries against any pandas dataframes existing in memory. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the pandas-datareader +docs: + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the Federal Reserve Economic Data +(FRED) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/geopandas/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) + +gurobipy-pandas provides a convenient accessor API to connect pandas with +gurobipy. It enables users to more easily and efficiently build mathematical +optimization models from data stored in DataFrames and Series, and to read +solutions back directly as pandas objects. + +### [staircase](https://github.com/staircase-dev/staircase) + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## IO + +### [NTV-pandas](https://github.com/loco-philippe/ntv-pandas) + +NTV-pandas provides a JSON converter with more data types than the ones supported by pandas directly. + +It supports the following data types: + +- pandas data types +- data types defined in the [NTV format](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- data types defined in [Table Schema specification](https://datapackage.org/standard/table-schema/) + +The interface is always reversible (conversion round trip) with two formats (JSON-NTV and JSON-TableSchema). + +Example: + +```python +import ntv_pandas as npd + +jsn = df.npd.to_json(table=False) # save df as a JSON-value (format Table Schema if table is True else format NTV ) +df = npd.read_json(jsn) # load a JSON-value as a `DataFrame` + +df.equals(npd.read_json(df.npd.to_json(df))) # `True` in any case, whether `table=True` or not +``` + +### [BCPandas](https://github.com/yehoshuadimarsky/bcpandas) + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native `df.to_sql` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for `df.to_sql`. + +### [Deltalake](https://pypi.org/project/deltalake) + +Deltalake python package lets you access tables stored in +[Delta Lake](https://delta.io/) natively in Python without the need to use Spark or +JVM. It provides the `delta_table.to_pyarrow_table().to_pandas()` method to convert +any Delta table into Pandas dataframe. + +### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas) + +pandas-gbq provides high performance reads and writes to and from +[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0), +these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`. +Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead. + +### [ArcticDB](https://github.com/man-group/ArcticDB) + +ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/). + +#### ArcticDB Terminology + +ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components: + +- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server. +- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database. +- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables. +- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object. + +#### Installation + +To install, simply run: + +```console +pip install arcticdb +``` + +To get started, we can import ArcticDB and instantiate it: + +```python +import arcticdb as adb +import numpy as np +import pandas as pd +# this will set up the storage using the local file system +arctic = adb.Arctic("lmdb://arcticdb_test") +``` + +> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.\ +> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`. + +#### Library Setup + +ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use: + +```python +lib = arctic.get_library('sample', create_if_missing=True) +``` + +#### Writing Data to ArcticDB + +Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage. + +```python +df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3) + } +) + +df +df.dtypes +``` + +Write to ArcticDB. + +```python +write_record = lib.write("test", df) +``` + +> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types: +> +> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index) +> - `RangeIndex` +> - `DatetimeIndex` +> - `MultiIndex` composed of above supported types +> +> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc'). + +#### Reading Data from ArcticDB + +Read the data back from storage: + +```python +read_record = lib.read("test") +read_record.data +df.dtypes +``` + +ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder). + +### [Hugging Face](https://huggingface.co/datasets) + +The Hugging Face Dataset Hub provides a large collection of ready-to-use datasets for machine learning shared by the community. The platform offers a user-friendly interface to explore, discover and visualize datasets, and provides tools to easily load and work with these datasets in Python thanks to the [huggingface_hub](https://github.com/huggingface/huggingface_hub) library. + +You can access datasets on Hugging Face using `hf://` paths in pandas, in the form `hf://datasets/username/dataset_name/...`. + +For example, here is how to load the [stanfordnlp/imdb dataset](https://huggingface.co/datasets/stanfordnlp/imdb): + +```python +import pandas as pd + +# Load the IMDB dataset +df = pd.read_parquet("hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet") +``` + +Tip: on a dataset page, click on "Use this dataset" to get the code to load it in pandas. + +To save a dataset on Hugging Face you need to [create a public or private dataset](https://huggingface.co/new-dataset) and [login](https://huggingface.co/docs/huggingface_hub/quick-start#login-command), and then you can use `df.to_csv/to_json/to_parquet`: + +```python +# Save the dataset to my Hugging Face account +df.to_parquet("hf://datasets/username/dataset_name/train.parquet") +``` + +You can find more information about the Hugging Face Dataset Hub in the [documentation](https://huggingface.co/docs/hub/en/datasets). + +## Out-of-core + +### [Bodo](https://github.com/bodo-ai/Bodo) + +Bodo is a high-performance compute engine for Python data processing. +Using an auto-parallelizing just-in-time (JIT) compiler, Bodo simplifies scaling Pandas +workloads from laptops to clusters without major code changes. +Under the hood, Bodo relies on MPI-based high-performance computing (HPC) technology—making it +both easier to use and often much faster than alternatives. +Bodo also provides a SQL engine that can query distributed pandas dataframes efficiently. + +```python +import pandas as pd +import bodo + +@bodo.jit +def process_data(): + df = pd.read_parquet("my_data.pq") + df2 = pd.DataFrame({"A": df.apply(lambda r: 0 if r.A == 0 else (r.B // r.A), axis=1)}) + df2.to_parquet("out.pq") + +process_data() +``` + +### [Cylon](https://cylondata.org/) + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +```python +from pycylon import read_csv, DataFrame, CylonEnv +from pycylon.net import MPIConfig + +# Initialize Cylon distributed environment +config: MPIConfig = MPIConfig() +env: CylonEnv = CylonEnv(config=config, distributed=True) + +df1: DataFrame = read_csv('/tmp/csv1.csv') +df2: DataFrame = read_csv('/tmp/csv2.csv') + +# Using 1000s of cores across the cluster to compute the join +df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + +print(df3) +``` + +### [Dask](https://docs.dask.org) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://ml.dask.org) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Ibis](https://ibis-project.org/docs/) + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Modin](https://github.com/modin-project/modin) + +The `modin.pandas` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (`read_csv`, `read_excel`, +`read_parquet`, etc.). + +```python +# import pandas as pd +import modin.pandas as pd + +df = pd.read_csv("big.csv") # use all your cores! +``` + +### [Pandarallel](https://github.com/nalepae/pandarallel) + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +It also displays progress bars. + +```python +from pandarallel import pandarallel + +pandarallel.initialize(progress_bar=True) + +# df.apply(func) +df.parallel_apply(func) +``` + +### [Vaex](https://vaex.io/docs/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- `vaex.from_pandas` +- `vaex.to_pandas_df` + +### [Hail Query](https://hail.is/) + +An out-of-core, preemptible-safe, distributed, dataframe library serving +the genetics community. Hail Query ships with on-disk data formats, +in-memory data formats, an expression compiler, a query planner, and a +distributed sort algorithm all designed to accelerate queries on large +matrices of genome sequencing data. + +It is often easiest to use pandas to manipulate the summary statistics or +other small aggregates produced by Hail. For this reason, Hail provides +native import to and export from pandas DataFrames: + +- [`Table.from_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.from_pandas) +- [`Table.to_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.to_pandas) + +## Data cleaning and validation + +### [pyjanitor](https://github.com/pyjanitor-devs/pyjanitor) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Pandera](https://pandera.readthedocs.io/en/stable/) + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +## Extension data types + +Pandas provides an interface for defining +[extension types](https://pandas.pydata.org/docs/development/extending.html#extension-types) to extend NumPy's type system. +The following libraries implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [awkward-pandas](https://github.com/scikit-hep/awkward) + +Awkward-pandas provides an extension type for storing Awkward +Arrays inside pandas' Series and +DataFrame. It also provides an accessor for using awkward functions +on Series that are of awkward type. + +### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas) + +db-dtypes provides an extension types for working with types like +DATE, TIME, and JSON from database systems. This package is used +by pandas-gbq to provide natural dtypes for BigQuery data types without +a natural numpy type. + +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working +with genomics data. It also includes `genomics` accessors for many useful properties +and methods related to QC and analysis of genomics data. + +### [Physipandas](https://github.com/mocquin/physipandas) + +Physipandas provides an extension for manipulating physical quantities +(like scalar and numpy.ndarray) in association with a physical unit +(like meter or joule) and additional features for integration of +`physipy` accessors with pandas Series and Dataframe. + +### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) + +Pint-Pandas provides an extension type for storing numeric arrays with units. +These arrays can be stored inside pandas' Series and DataFrame. Operations +between Series and DataFrame columns which use pint's extension array are then +units aware. + +### [Text Extensions](https://ibm.biz/text-extensions-for-pandas) + +Text Extensions for Pandas provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into pandas DataFrames. + +## Accessors + +A directory of projects providing +[extension accessors](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). +This is for users to discover new accessors and for library +authors to coordinate on the namespace. + +| Library | Accessor | Classes | +| -------------------------------------------------------------------- | ---------- | --------------------- | +| [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | +| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | +| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | +| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | +| [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | +| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | +| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | +| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | +| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | + +## Development tools + +### [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through these issues [14468](https://github.com/pandas-dev/pandas/issues/14468), +[26766](https://github.com/pandas-dev/pandas/issues/26766), [28142](https://github.com/pandas-dev/pandas/issues/28142). + +See installation and usage instructions on the [GitHub page](https://github.com/VirtusLab/pandas-stubs). + +### [Hamilton](https://github.com/dagworks-inc/hamilton) + +Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a +Pandas code base, specifically with respect to feature engineering for machine learning models. + +It prescribes an opinionated paradigm, that ensures all code is: + +- unit testable +- integration testing friendly +- documentation friendly +- transformation logic is reusable, as it is decoupled from the context of where it is used. +- integratable with runtime data quality checks. + +This helps one to scale your pandas code base, at the same time, keeping maintenance costs low. + +For more information, see [documentation](https://hamilton.readthedocs.io/). From a3e5d5912928bb8ece9fd567f5d6b1055191fa51 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:09 -0500 Subject: [PATCH 123/184] New translations ecosystem.md (Hindi) --- web/pandas/hi/community/ecosystem.md | 738 +++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 web/pandas/hi/community/ecosystem.md diff --git a/web/pandas/hi/community/ecosystem.md b/web/pandas/hi/community/ecosystem.md new file mode 100644 index 000000000..f8e721d83 --- /dev/null +++ b/web/pandas/hi/community/ecosystem.md @@ -0,0 +1,738 @@ +# Ecosystem + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around its original requirements. + +This is a community-maintained list of projects that build on pandas in order +to provide tools in the PyData space. The pandas core development team does not necessarily endorse any particular project on this list or have any knowledge of the maintenance status of any particular library. + +For a more complete list of projects that depend on pandas, see the libraries.io usage page for +pandas or search pypi for +pandas. + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [skrub](https://skrub-data.org) + +Skrub facilitates machine learning on dataframes. It bridges pandas +to scikit-learn and related. In particular it facilitates building +features from dataframes. + +### [Featuretools](https://github.com/alteryx/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +### [Compose](https://github.com/alteryx/compose) + +Compose is a machine learning tool for labeling data and prediction engineering. +It allows you to structure the labeling process by parameterizing +prediction problems and transforming time-driven relational data into +target values with cutoff times that can be used for supervised learning. + +### [STUMPY](https://github.com/TDAmeritrade/stumpy) + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +[matrix profile](https://stumpy.readthedocs.io/en/latest/Tutorial_The_Matrix_Profile.html), +which can be used for a wide variety of time series data mining tasks. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://docs.bokeh.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [pygwalker](https://github.com/Kanaries/pygwalker) + +PyGWalker is an interactive data visualization and +exploratory data analysis tool built upon Graphic Walker +with support for visualization, cleaning, and annotation workflows. + +pygwalker can save interactively created charts +to Graphic-Walker and Vega-Lite JSON. + +``` +import pygwalker as pyg +pyg.walk(df) +``` + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +``` +import seaborn as sns +sns.set_theme() +``` + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on "The Grammar of +Graphics" +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of matplotlib, +ggplot for Python, and +Seaborn can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has cloud, offline, or on-premise +accounts for private use. + +### [Lux](https://github.com/lux-org/lux) + +Lux is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +```python +import lux +import pandas as pd + +df = pd.read_csv("data.csv") +df # discover interesting insights! +``` + +By printing out a dataframe, Lux automatically [recommends a set of visualizations](https://raw.githubusercontent.com/lux-org/lux-resources/master/readme_img/demohighlight.gif) that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a [powerful, intuitive language](https://lux-api.readthedocs.io/en/latest/source/guide/vis.html) that allow users to create Altair, matplotlib, or Vega-Lite visualizations without having to think at the level of code. + +### [D-Tale](https://github.com/man-group/dtale) + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +```python +import dtale + +dtale.show(df) +``` + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the [grid](http://alphatechadmin.pythonanywhere.com/dtale/main/1). + +### [hvplot](https://hvplot.holoviz.org/index.html) + +hvPlot is a high-level plotting API for the PyData ecosystem built on [HoloViews](https://holoviews.org/). +It can be loaded as a native pandas plotting backend via + +```python +pd.set_option("plotting.backend", "hvplot") +``` + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_` and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See [Options and Settings](https://pandas.pydata.org/docs/user_guide/options.html) +for pandas `display.` settings. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its Variable +Explorer allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copied/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/current/panes/editor.html) and IPython +Console, and Spyder's +[Help pane](https://docs.spyder-ide.org/current/panes/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +### [marimo](https://marimo.io) + +marimo is a reactive notebook for Python and SQL that enhances productivity when working with dataframes. It provides several features to make data manipulation and visualization more interactive and fun: + +1. Rich, interactive displays: marimo can display pandas dataframes in interactive tables or charts with filtering and sorting capabilities. +2. Data selection: Users can select data in tables or pandas-backed plots, and the selections are automatically sent to Python as pandas dataframes. +3. No-code transformations: Users can interactively transform pandas dataframes using a GUI, without writing code. The generated code can be copied and pasted into the notebook. +4. Custom filters: marimo allows the creation of pandas-backed filters using UI elements like sliders and dropdowns. +5. Dataset explorer: marimo automatically discovers and displays all dataframes in the notebook, allowing users to explore and visualize data interactively. +6. SQL integration: marimo allows users to write SQL queries against any pandas dataframes existing in memory. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the pandas-datareader +docs: + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the Federal Reserve Economic Data +(FRED) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/geopandas/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) + +gurobipy-pandas provides a convenient accessor API to connect pandas with +gurobipy. It enables users to more easily and efficiently build mathematical +optimization models from data stored in DataFrames and Series, and to read +solutions back directly as pandas objects. + +### [staircase](https://github.com/staircase-dev/staircase) + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## IO + +### [NTV-pandas](https://github.com/loco-philippe/ntv-pandas) + +NTV-pandas provides a JSON converter with more data types than the ones supported by pandas directly. + +It supports the following data types: + +- pandas data types +- data types defined in the [NTV format](https://loco-philippe.github.io/ES/JSON%20semantic%20format%20\(JSON-NTV\).htm) +- data types defined in [Table Schema specification](https://datapackage.org/standard/table-schema/) + +The interface is always reversible (conversion round trip) with two formats (JSON-NTV and JSON-TableSchema). + +Example: + +```python +import ntv_pandas as npd + +jsn = df.npd.to_json(table=False) # save df as a JSON-value (format Table Schema if table is True else format NTV ) +df = npd.read_json(jsn) # load a JSON-value as a `DataFrame` + +df.equals(npd.read_json(df.npd.to_json(df))) # `True` in any case, whether `table=True` or not +``` + +### [BCPandas](https://github.com/yehoshuadimarsky/bcpandas) + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native `df.to_sql` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for `df.to_sql`. + +### [Deltalake](https://pypi.org/project/deltalake) + +Deltalake python package lets you access tables stored in +[Delta Lake](https://delta.io/) natively in Python without the need to use Spark or +JVM. It provides the `delta_table.to_pyarrow_table().to_pandas()` method to convert +any Delta table into Pandas dataframe. + +### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas) + +pandas-gbq provides high performance reads and writes to and from +[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0), +these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`. +Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead. + +### [ArcticDB](https://github.com/man-group/ArcticDB) + +ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/). + +#### ArcticDB Terminology + +ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components: + +- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server. +- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database. +- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables. +- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object. + +#### Installation + +To install, simply run: + +```console +pip install arcticdb +``` + +To get started, we can import ArcticDB and instantiate it: + +```python +import arcticdb as adb +import numpy as np +import pandas as pd +# this will set up the storage using the local file system +arctic = adb.Arctic("lmdb://arcticdb_test") +``` + +> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.\ +> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`. + +#### Library Setup + +ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use: + +```python +lib = arctic.get_library('sample', create_if_missing=True) +``` + +#### Writing Data to ArcticDB + +Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage. + +```python +df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3) + } +) + +df +df.dtypes +``` + +Write to ArcticDB. + +```python +write_record = lib.write("test", df) +``` + +> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types: +> +> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index) +> - `RangeIndex` +> - `DatetimeIndex` +> - `MultiIndex` composed of above supported types +> +> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc'). + +#### Reading Data from ArcticDB + +Read the data back from storage: + +```python +read_record = lib.read("test") +read_record.data +df.dtypes +``` + +ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder). + +### [Hugging Face](https://huggingface.co/datasets) + +The Hugging Face Dataset Hub provides a large collection of ready-to-use datasets for machine learning shared by the community. The platform offers a user-friendly interface to explore, discover and visualize datasets, and provides tools to easily load and work with these datasets in Python thanks to the [huggingface_hub](https://github.com/huggingface/huggingface_hub) library. + +You can access datasets on Hugging Face using `hf://` paths in pandas, in the form `hf://datasets/username/dataset_name/...`. + +For example, here is how to load the [stanfordnlp/imdb dataset](https://huggingface.co/datasets/stanfordnlp/imdb): + +```python +import pandas as pd + +# Load the IMDB dataset +df = pd.read_parquet("hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet") +``` + +Tip: on a dataset page, click on "Use this dataset" to get the code to load it in pandas. + +To save a dataset on Hugging Face you need to [create a public or private dataset](https://huggingface.co/new-dataset) and [login](https://huggingface.co/docs/huggingface_hub/quick-start#login-command), and then you can use `df.to_csv/to_json/to_parquet`: + +```python +# Save the dataset to my Hugging Face account +df.to_parquet("hf://datasets/username/dataset_name/train.parquet") +``` + +You can find more information about the Hugging Face Dataset Hub in the [documentation](https://huggingface.co/docs/hub/en/datasets). + +## Out-of-core + +### [Bodo](https://github.com/bodo-ai/Bodo) + +Bodo is a high-performance compute engine for Python data processing. +Using an auto-parallelizing just-in-time (JIT) compiler, Bodo simplifies scaling Pandas +workloads from laptops to clusters without major code changes. +Under the hood, Bodo relies on MPI-based high-performance computing (HPC) technology—making it +both easier to use and often much faster than alternatives. +Bodo also provides a SQL engine that can query distributed pandas dataframes efficiently. + +```python +import pandas as pd +import bodo + +@bodo.jit +def process_data(): + df = pd.read_parquet("my_data.pq") + df2 = pd.DataFrame({"A": df.apply(lambda r: 0 if r.A == 0 else (r.B // r.A), axis=1)}) + df2.to_parquet("out.pq") + +process_data() +``` + +### [Cylon](https://cylondata.org/) + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +```python +from pycylon import read_csv, DataFrame, CylonEnv +from pycylon.net import MPIConfig + +# Initialize Cylon distributed environment +config: MPIConfig = MPIConfig() +env: CylonEnv = CylonEnv(config=config, distributed=True) + +df1: DataFrame = read_csv('/tmp/csv1.csv') +df2: DataFrame = read_csv('/tmp/csv2.csv') + +# Using 1000s of cores across the cluster to compute the join +df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + +print(df3) +``` + +### [Dask](https://docs.dask.org) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://ml.dask.org) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Ibis](https://ibis-project.org/docs/) + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Modin](https://github.com/modin-project/modin) + +The `modin.pandas` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (`read_csv`, `read_excel`, +`read_parquet`, etc.). + +```python +# import pandas as pd +import modin.pandas as pd + +df = pd.read_csv("big.csv") # use all your cores! +``` + +### [Pandarallel](https://github.com/nalepae/pandarallel) + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +It also displays progress bars. + +```python +from pandarallel import pandarallel + +pandarallel.initialize(progress_bar=True) + +# df.apply(func) +df.parallel_apply(func) +``` + +### [Vaex](https://vaex.io/docs/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- `vaex.from_pandas` +- `vaex.to_pandas_df` + +### [Hail Query](https://hail.is/) + +An out-of-core, preemptible-safe, distributed, dataframe library serving +the genetics community. Hail Query ships with on-disk data formats, +in-memory data formats, an expression compiler, a query planner, and a +distributed sort algorithm all designed to accelerate queries on large +matrices of genome sequencing data. + +It is often easiest to use pandas to manipulate the summary statistics or +other small aggregates produced by Hail. For this reason, Hail provides +native import to and export from pandas DataFrames: + +- [`Table.from_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.from_pandas) +- [`Table.to_pandas`](https://hail.is/docs/latest/hail.Table.html#hail.Table.to_pandas) + +## Data cleaning and validation + +### [pyjanitor](https://github.com/pyjanitor-devs/pyjanitor) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Pandera](https://pandera.readthedocs.io/en/stable/) + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +## Extension data types + +Pandas provides an interface for defining +[extension types](https://pandas.pydata.org/docs/development/extending.html#extension-types) to extend NumPy's type system. +The following libraries implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [awkward-pandas](https://github.com/scikit-hep/awkward) + +Awkward-pandas provides an extension type for storing Awkward +Arrays inside pandas' Series and +DataFrame. It also provides an accessor for using awkward functions +on Series that are of awkward type. + +### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas) + +db-dtypes provides an extension types for working with types like +DATE, TIME, and JSON from database systems. This package is used +by pandas-gbq to provide natural dtypes for BigQuery data types without +a natural numpy type. + +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working +with genomics data. It also includes `genomics` accessors for many useful properties +and methods related to QC and analysis of genomics data. + +### [Physipandas](https://github.com/mocquin/physipandas) + +Physipandas provides an extension for manipulating physical quantities +(like scalar and numpy.ndarray) in association with a physical unit +(like meter or joule) and additional features for integration of +`physipy` accessors with pandas Series and Dataframe. + +### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) + +Pint-Pandas provides an extension type for storing numeric arrays with units. +These arrays can be stored inside pandas' Series and DataFrame. Operations +between Series and DataFrame columns which use pint's extension array are then +units aware. + +### [Text Extensions](https://ibm.biz/text-extensions-for-pandas) + +Text Extensions for Pandas provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into pandas DataFrames. + +## Accessors + +A directory of projects providing +[extension accessors](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). +This is for users to discover new accessors and for library +authors to coordinate on the namespace. + +| Library | Accessor | Classes | +| -------------------------------------------------------------------- | ---------- | --------------------- | +| [awkward-pandas](https://awkward-pandas.readthedocs.io/en/latest/) | `ak` | `Series` | +| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | +| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | +| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | +| [physipandas](https://github.com/mocquin/physipandas) | `physipy` | `Series`, `DataFrame` | +| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` | +| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` | +| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` | +| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` | + +## Development tools + +### [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through these issues [14468](https://github.com/pandas-dev/pandas/issues/14468), +[26766](https://github.com/pandas-dev/pandas/issues/26766), [28142](https://github.com/pandas-dev/pandas/issues/28142). + +See installation and usage instructions on the [GitHub page](https://github.com/VirtusLab/pandas-stubs). + +### [Hamilton](https://github.com/dagworks-inc/hamilton) + +Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a +Pandas code base, specifically with respect to feature engineering for machine learning models. + +It prescribes an opinionated paradigm, that ensures all code is: + +- unit testable +- integration testing friendly +- documentation friendly +- transformation logic is reusable, as it is decoupled from the context of where it is used. +- integratable with runtime data quality checks. + +This helps one to scale your pandas code base, at the same time, keeping maintenance costs low. + +For more information, see [documentation](https://hamilton.readthedocs.io/). From 0fc1b5c805ea0489817107605e485b4cce7ec687 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:11 -0500 Subject: [PATCH 124/184] New translations citing.md (French) --- web/pandas/fr/about/citing.md | 133 ++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 web/pandas/fr/about/citing.md diff --git a/web/pandas/fr/about/citing.md b/web/pandas/fr/about/citing.md new file mode 100644 index 000000000..7192446c6 --- /dev/null +++ b/web/pandas/fr/about/citing.md @@ -0,0 +1,133 @@ +# Citing and logo + +## Citing pandas + +If you use _pandas_ for a scientific publication, we would appreciate citations to the published software and the +following paper: + +- pandas on Zenodo, + Please find us on Zenodo and replace with the citation for the version you are using. You can replace the full author + list from there with "The pandas development team" like in the example below. + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` + +- [Data structures for statistical computing in python](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf), + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` + +## Brand and logo + +When using the project name _pandas_, please use it in lower case, even at the beginning of a sentence. + +The official logos of _pandas_ are: + +### Primary logo + + + + + + + + +### Secondary logo + + + + + + + + +### Logo mark + + + + + + + + +### Logo usage + +The pandas logo is available in full color and white accent. +The full color logo should only appear against white backgrounds. + +When using the logo, please follow the next directives: + +- Primary logo should never be seen under 1 inch in size for printing and 72px for web +- The secondary logo should never be seen under 0.75 inch in size for printing and 55px for web +- Leave enough margin around the logo (leave the height of the logo in the top, bottom and both sides) +- Do not distort the logo by changing its proportions +- Do not place text or other elements on top of the logo + +### Colors + + + + + + + +
+ + + + +
+ Blue
+ RGB: R21 G4 B88
+ HEX: #150458 +
+ + + + +
+ Yellow
+ RGB: R255 G202 B0
+ HEX: #FFCA00 +
+ + + + +
+ Pink
+ RGB: R231 G4 B136
+ HEX: #E70488 +
From 36fd447b83c4acc03e67593b7b580ffd967e4363 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:12 -0500 Subject: [PATCH 125/184] New translations citing.md (Arabic) --- web/pandas/ar/about/citing.md | 134 ++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 web/pandas/ar/about/citing.md diff --git a/web/pandas/ar/about/citing.md b/web/pandas/ar/about/citing.md new file mode 100644 index 000000000..0619c5425 --- /dev/null +++ b/web/pandas/ar/about/citing.md @@ -0,0 +1,134 @@ +# Citing and logo + +## Citing pandas + +If you use _pandas_ for a scientific publication, we would appreciate citations to the published software and the +following paper: + +- pandas on Zenodo, + Please find us on Zenodo and replace with the citation for the version you are using. You can replace the full author + list from there with "The pandas development team" like in the example below. + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` + +- [Data structures for statistical computing in python](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf), + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` + +## Brand and logo + +When using the project name _pandas_, please use it in lower case, even at the beginning of a sentence. + +The official logos of _pandas_ are: + +### Primary logo + + + + + + + + +### Secondary logo + + + + + + + + +### Logo mark + + + + + + + + +### Logo usage + +The pandas logo is available in full color and white accent. +The full color logo should only appear against white backgrounds. +The white accent logo should go against contrasting color background. + +When using the logo, please follow the next directives: + +- Primary logo should never be seen under 1 inch in size for printing and 72px for web +- The secondary logo should never be seen under 0.75 inch in size for printing and 55px for web +- Leave enough margin around the logo (leave the height of the logo in the top, bottom and both sides) +- Do not distort the logo by changing its proportions +- Do not place text or other elements on top of the logo + +### Colors + + + + + + + +
+ + + + +
+ Blue
+ RGB: R21 G4 B88
+ HEX: #150458 +
+ + + + +
+ Yellow
+ RGB: R255 G202 B0
+ HEX: #FFCA00 +
+ + + + +
+ Pink
+ RGB: R231 G4 B136
+ HEX: #E70488 +
From 8ff7b6dd141fa1b6d34d44a9045956cf334ff0d2 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:14 -0500 Subject: [PATCH 126/184] New translations citing.md (Catalan) --- web/pandas/ca/about/citing.md | 134 ++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 web/pandas/ca/about/citing.md diff --git a/web/pandas/ca/about/citing.md b/web/pandas/ca/about/citing.md new file mode 100644 index 000000000..0619c5425 --- /dev/null +++ b/web/pandas/ca/about/citing.md @@ -0,0 +1,134 @@ +# Citing and logo + +## Citing pandas + +If you use _pandas_ for a scientific publication, we would appreciate citations to the published software and the +following paper: + +- pandas on Zenodo, + Please find us on Zenodo and replace with the citation for the version you are using. You can replace the full author + list from there with "The pandas development team" like in the example below. + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` + +- [Data structures for statistical computing in python](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf), + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` + +## Brand and logo + +When using the project name _pandas_, please use it in lower case, even at the beginning of a sentence. + +The official logos of _pandas_ are: + +### Primary logo + + + + + + + + +### Secondary logo + + + + + + + + +### Logo mark + + + + + + + + +### Logo usage + +The pandas logo is available in full color and white accent. +The full color logo should only appear against white backgrounds. +The white accent logo should go against contrasting color background. + +When using the logo, please follow the next directives: + +- Primary logo should never be seen under 1 inch in size for printing and 72px for web +- The secondary logo should never be seen under 0.75 inch in size for printing and 55px for web +- Leave enough margin around the logo (leave the height of the logo in the top, bottom and both sides) +- Do not distort the logo by changing its proportions +- Do not place text or other elements on top of the logo + +### Colors + + + + + + + +
+ + + + +
+ Blue
+ RGB: R21 G4 B88
+ HEX: #150458 +
+ + + + +
+ Yellow
+ RGB: R255 G202 B0
+ HEX: #FFCA00 +
+ + + + +
+ Pink
+ RGB: R231 G4 B136
+ HEX: #E70488 +
From 1ca4d1ae2bf6f76ac94f1cba24b9b83279710e32 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:15 -0500 Subject: [PATCH 127/184] New translations citing.md (Japanese) --- web/pandas/ja/about/citing.md | 143 ++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 web/pandas/ja/about/citing.md diff --git a/web/pandas/ja/about/citing.md b/web/pandas/ja/about/citing.md new file mode 100644 index 000000000..4d40c241d --- /dev/null +++ b/web/pandas/ja/about/citing.md @@ -0,0 +1,143 @@ +# Citing and logo + +## pandas を引用することについて + +もしあなたが学術論文で _pandas_ を使用する場合は、以下のソフトウェアおよび論文への引用をお願いいたします: + +- Zenodo における pandas, + Zenodo で私たちを見つけて、使用しているバージョンの引用に置き換えてください。 以下の例のように、作者の + リストを"パンダ開発チーム"に置き換えることができます。 + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` + +- [Data structures for statistical computing in python](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf),\ + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` + +## ブランドとロゴ + +プロジェクト名 _pandas_ を使用する際は、文頭であっても小文字で記載してください。 + +_pandas_ の公式ロゴは次の通りです: + +### 主要ロゴ + + + + + + + + +### セカンダリーロゴ + + + + + + + + +### ロゴマーク + + + + + + + + +### ロゴの使用方法 + +The pandas logo is available in full color and white accent. +The full color logo should only appear against white backgrounds. +The white accent logo should go against contrasting color background. + +ロゴを使用する際は、以下の指示に従ってください: + +- プライマリロゴは印刷時は1インチ未満、ウェブ時は72px未満のサイズで使用しないでください。 +- セカンダリーロゴは印刷時は0.75インチ未満、ウェブ時は55px未満のサイズで使用しないでください。 +- ロゴの周囲に十分な余白を残してください(ロゴの高さ分の余白を上部、下部、左右に設ける)。 +- ロゴの比率を変更して歪めないでください。 +- ロゴの上にテキストやその他の要素を配置しないでください。 + +### カラー + + + + + + + + + + +
+ + +
+ ブルー
+ RGB: R21 G4 B88
+ HEX: #150458 +
+ + + + +
+ イエロー
+ RGB: R255 G202 B0
+ HEX: #FFCA00 +
+ + + + +
+ ピンク
+ RGB: R231 G4 B136
+ HEX: #E70488 +
+ + + Zenodo 上の pandas, +Zenodo で _pandas_ を見つけ、ご利用中のバージョンに対応する引用文に置き換えてください。以下の例のように、完全な著者リストを "The pandas development team" に置き換えることが可能です。 + + + _pandas_ のロゴは、フルカラーとホワイトアクセントの両方で利用可能です。 +フルカラーのロゴは白い背景でのみ使用してください。 +ホワイトアクセントのロゴは対照的な色の背景で使用してください。
From 9262a376a5b8aeeb3904ef3fbd9cd855ed30048b Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:16 -0500 Subject: [PATCH 128/184] New translations citing.md (Korean) --- web/pandas/ko/about/citing.md | 134 ++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 web/pandas/ko/about/citing.md diff --git a/web/pandas/ko/about/citing.md b/web/pandas/ko/about/citing.md new file mode 100644 index 000000000..0619c5425 --- /dev/null +++ b/web/pandas/ko/about/citing.md @@ -0,0 +1,134 @@ +# Citing and logo + +## Citing pandas + +If you use _pandas_ for a scientific publication, we would appreciate citations to the published software and the +following paper: + +- pandas on Zenodo, + Please find us on Zenodo and replace with the citation for the version you are using. You can replace the full author + list from there with "The pandas development team" like in the example below. + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` + +- [Data structures for statistical computing in python](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf), + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` + +## Brand and logo + +When using the project name _pandas_, please use it in lower case, even at the beginning of a sentence. + +The official logos of _pandas_ are: + +### Primary logo + + + + + + + + +### Secondary logo + + + + + + + + +### Logo mark + + + + + + + + +### Logo usage + +The pandas logo is available in full color and white accent. +The full color logo should only appear against white backgrounds. +The white accent logo should go against contrasting color background. + +When using the logo, please follow the next directives: + +- Primary logo should never be seen under 1 inch in size for printing and 72px for web +- The secondary logo should never be seen under 0.75 inch in size for printing and 55px for web +- Leave enough margin around the logo (leave the height of the logo in the top, bottom and both sides) +- Do not distort the logo by changing its proportions +- Do not place text or other elements on top of the logo + +### Colors + + + + + + + +
+ + + + +
+ Blue
+ RGB: R21 G4 B88
+ HEX: #150458 +
+ + + + +
+ Yellow
+ RGB: R255 G202 B0
+ HEX: #FFCA00 +
+ + + + +
+ Pink
+ RGB: R231 G4 B136
+ HEX: #E70488 +
From 5e07ec14ebfd3a028e0fd2e31319a1b52eddc872 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:17 -0500 Subject: [PATCH 129/184] New translations citing.md (Polish) --- web/pandas/pl/about/citing.md | 134 ++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 web/pandas/pl/about/citing.md diff --git a/web/pandas/pl/about/citing.md b/web/pandas/pl/about/citing.md new file mode 100644 index 000000000..0619c5425 --- /dev/null +++ b/web/pandas/pl/about/citing.md @@ -0,0 +1,134 @@ +# Citing and logo + +## Citing pandas + +If you use _pandas_ for a scientific publication, we would appreciate citations to the published software and the +following paper: + +- pandas on Zenodo, + Please find us on Zenodo and replace with the citation for the version you are using. You can replace the full author + list from there with "The pandas development team" like in the example below. + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` + +- [Data structures for statistical computing in python](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf), + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` + +## Brand and logo + +When using the project name _pandas_, please use it in lower case, even at the beginning of a sentence. + +The official logos of _pandas_ are: + +### Primary logo + + + + + + + + +### Secondary logo + + + + + + + + +### Logo mark + + + + + + + + +### Logo usage + +The pandas logo is available in full color and white accent. +The full color logo should only appear against white backgrounds. +The white accent logo should go against contrasting color background. + +When using the logo, please follow the next directives: + +- Primary logo should never be seen under 1 inch in size for printing and 72px for web +- The secondary logo should never be seen under 0.75 inch in size for printing and 55px for web +- Leave enough margin around the logo (leave the height of the logo in the top, bottom and both sides) +- Do not distort the logo by changing its proportions +- Do not place text or other elements on top of the logo + +### Colors + + + + + + + +
+ + + + +
+ Blue
+ RGB: R21 G4 B88
+ HEX: #150458 +
+ + + + +
+ Yellow
+ RGB: R255 G202 B0
+ HEX: #FFCA00 +
+ + + + +
+ Pink
+ RGB: R231 G4 B136
+ HEX: #E70488 +
From 02bbc1f21054e3d5df021effbee9459d598811dd Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:18 -0500 Subject: [PATCH 130/184] New translations citing.md (Russian) --- web/pandas/ru/about/citing.md | 134 ++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 web/pandas/ru/about/citing.md diff --git a/web/pandas/ru/about/citing.md b/web/pandas/ru/about/citing.md new file mode 100644 index 000000000..0619c5425 --- /dev/null +++ b/web/pandas/ru/about/citing.md @@ -0,0 +1,134 @@ +# Citing and logo + +## Citing pandas + +If you use _pandas_ for a scientific publication, we would appreciate citations to the published software and the +following paper: + +- pandas on Zenodo, + Please find us on Zenodo and replace with the citation for the version you are using. You can replace the full author + list from there with "The pandas development team" like in the example below. + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` + +- [Data structures for statistical computing in python](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf), + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` + +## Brand and logo + +When using the project name _pandas_, please use it in lower case, even at the beginning of a sentence. + +The official logos of _pandas_ are: + +### Primary logo + + + + + + + + +### Secondary logo + + + + + + + + +### Logo mark + + + + + + + + +### Logo usage + +The pandas logo is available in full color and white accent. +The full color logo should only appear against white backgrounds. +The white accent logo should go against contrasting color background. + +When using the logo, please follow the next directives: + +- Primary logo should never be seen under 1 inch in size for printing and 72px for web +- The secondary logo should never be seen under 0.75 inch in size for printing and 55px for web +- Leave enough margin around the logo (leave the height of the logo in the top, bottom and both sides) +- Do not distort the logo by changing its proportions +- Do not place text or other elements on top of the logo + +### Colors + + + + + + + +
+ + + + +
+ Blue
+ RGB: R21 G4 B88
+ HEX: #150458 +
+ + + + +
+ Yellow
+ RGB: R255 G202 B0
+ HEX: #FFCA00 +
+ + + + +
+ Pink
+ RGB: R231 G4 B136
+ HEX: #E70488 +
From 0052f2e566258446b5316e64910618a51b088f47 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:20 -0500 Subject: [PATCH 131/184] New translations citing.md (Chinese Simplified) --- web/pandas/zh/about/citing.md | 134 ++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 web/pandas/zh/about/citing.md diff --git a/web/pandas/zh/about/citing.md b/web/pandas/zh/about/citing.md new file mode 100644 index 000000000..0619c5425 --- /dev/null +++ b/web/pandas/zh/about/citing.md @@ -0,0 +1,134 @@ +# Citing and logo + +## Citing pandas + +If you use _pandas_ for a scientific publication, we would appreciate citations to the published software and the +following paper: + +- pandas on Zenodo, + Please find us on Zenodo and replace with the citation for the version you are using. You can replace the full author + list from there with "The pandas development team" like in the example below. + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` + +- [Data structures for statistical computing in python](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf), + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` + +## Brand and logo + +When using the project name _pandas_, please use it in lower case, even at the beginning of a sentence. + +The official logos of _pandas_ are: + +### Primary logo + + + + + + + + +### Secondary logo + + + + + + + + +### Logo mark + + + + + + + + +### Logo usage + +The pandas logo is available in full color and white accent. +The full color logo should only appear against white backgrounds. +The white accent logo should go against contrasting color background. + +When using the logo, please follow the next directives: + +- Primary logo should never be seen under 1 inch in size for printing and 72px for web +- The secondary logo should never be seen under 0.75 inch in size for printing and 55px for web +- Leave enough margin around the logo (leave the height of the logo in the top, bottom and both sides) +- Do not distort the logo by changing its proportions +- Do not place text or other elements on top of the logo + +### Colors + + + + + + + +
+ + + + +
+ Blue
+ RGB: R21 G4 B88
+ HEX: #150458 +
+ + + + +
+ Yellow
+ RGB: R255 G202 B0
+ HEX: #FFCA00 +
+ + + + +
+ Pink
+ RGB: R231 G4 B136
+ HEX: #E70488 +
From 8da52744d80b8e8314151eacf71aa88ed17e42c4 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:22 -0500 Subject: [PATCH 132/184] New translations citing.md (Persian) --- web/pandas/fa/about/citing.md | 131 ++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 web/pandas/fa/about/citing.md diff --git a/web/pandas/fa/about/citing.md b/web/pandas/fa/about/citing.md new file mode 100644 index 000000000..b4ef4d14e --- /dev/null +++ b/web/pandas/fa/about/citing.md @@ -0,0 +1,131 @@ +# استناد و نشان + +## استناد به پانداس + +اگر از کتابخانه پانداس (pandas) در یک انتشار علمی استفاده می‌کنید، قدردان خواهیم بود که نرم‌افزار منتشرشده و مقاله زیر را مورد استناد قرار دهید: + +- پانداس در Zenodo, + لطفاً ما را در Zenodo پیدا کنید و استناد مربوط به نسخه‌ای که استفاده می‌کنید را جایگزین کنید. می‌توانید فهرست کامل نویسندگان را از آنجا با عبارت «The pandas development team» همانند نمونه زیر جایگزین کنید. + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: پانداس}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` + +- [ساختارهای داده برای رایانش آماری در پایتون](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf), + مک‌کینی، مجموعه مقالات نهمین کنفرانس پایتون در علوم، جلد ۴۴۵، سال ۲۰۱۰. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` + +## نشان و نماد تجاری + +هنگام استفاده از نام پروژه _پانداس_، لطفاً همیشه آن را با حروف کوچک بنویسید، حتی در ابتدای جمله. + +لوگوهای رسمی _پانداس_ عبارتند از: + +### نشان اصلی + + + + + + + + +### نشان فرعی + + + + + + + + +### نشان‌واره + + + + + + + + +### کاربرد نشان + +نشان (لوگوی) پانداس در دو گونهٔ رنگیِ کامل و تک‌رنگِ سفید در دسترس است. +نشانِ رنگیِ کامل تنها باید بر زمینه‌های سفید نمایش داده شود. +نشانِ سفید باید بر زمینه‌ای با رنگ متضاد قرار گیرد. + +هنگام بهره‌گیری از نشان، لطفاً دستورالعمل‌های زیر را رعایت کنید: + +- نشان اصلی نباید هرگز در اندازه‌ای کوچکتر از ۱ اینچ برای چاپ و ۷۲ پیکسل برای وب نمایش داده شود +- نشانۀ فرعی نباید هرگز در اندازه‌ای کوچکتر از ۰٫۷۵ اینچ برای چاپ و ۵۵ پیکسل برای وب نمایش داده شود +- دور تا دور نشان باید حاشیه‌ی کافی گذاشته شود (به‌اندازه‌ی بلندیِ خودِ نشان در بالا، پایین و هر دو سوی آن) +- از تغییر نسبت‌های نشان و تحریف آن خودداری کنید +- از قرار دادن متن یا عناصر دیگر روی نشان خودداری کنید + +### رنگ‌ها + + + + + + + +
+ + + +
+ آبی
+ RGB: R21 G4 B88
+ HEX: #150458 +
+ + + + +
+ زرد
+ RGB: R255 G202 B0
+ HEX: #FFCA00 +
+ + + + +
+ صورتی
+ RGB: R231 G4 B136
+ HEX: #E70488 +
From 83ac9576506a3828e95de65889ca842726891b8b Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:23 -0500 Subject: [PATCH 133/184] New translations citing.md (Tamil) --- web/pandas/ta/about/citing.md | 134 ++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 web/pandas/ta/about/citing.md diff --git a/web/pandas/ta/about/citing.md b/web/pandas/ta/about/citing.md new file mode 100644 index 000000000..0619c5425 --- /dev/null +++ b/web/pandas/ta/about/citing.md @@ -0,0 +1,134 @@ +# Citing and logo + +## Citing pandas + +If you use _pandas_ for a scientific publication, we would appreciate citations to the published software and the +following paper: + +- pandas on Zenodo, + Please find us on Zenodo and replace with the citation for the version you are using. You can replace the full author + list from there with "The pandas development team" like in the example below. + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` + +- [Data structures for statistical computing in python](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf), + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` + +## Brand and logo + +When using the project name _pandas_, please use it in lower case, even at the beginning of a sentence. + +The official logos of _pandas_ are: + +### Primary logo + + + + + + + + +### Secondary logo + + + + + + + + +### Logo mark + + + + + + + + +### Logo usage + +The pandas logo is available in full color and white accent. +The full color logo should only appear against white backgrounds. +The white accent logo should go against contrasting color background. + +When using the logo, please follow the next directives: + +- Primary logo should never be seen under 1 inch in size for printing and 72px for web +- The secondary logo should never be seen under 0.75 inch in size for printing and 55px for web +- Leave enough margin around the logo (leave the height of the logo in the top, bottom and both sides) +- Do not distort the logo by changing its proportions +- Do not place text or other elements on top of the logo + +### Colors + + + + + + + +
+ + + + +
+ Blue
+ RGB: R21 G4 B88
+ HEX: #150458 +
+ + + + +
+ Yellow
+ RGB: R255 G202 B0
+ HEX: #FFCA00 +
+ + + + +
+ Pink
+ RGB: R231 G4 B136
+ HEX: #E70488 +
From 14164652c481d5de0e0e4881018bf459c56ddbcc Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:24 -0500 Subject: [PATCH 134/184] New translations citing.md (Hindi) --- web/pandas/hi/about/citing.md | 134 ++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 web/pandas/hi/about/citing.md diff --git a/web/pandas/hi/about/citing.md b/web/pandas/hi/about/citing.md new file mode 100644 index 000000000..0619c5425 --- /dev/null +++ b/web/pandas/hi/about/citing.md @@ -0,0 +1,134 @@ +# Citing and logo + +## Citing pandas + +If you use _pandas_ for a scientific publication, we would appreciate citations to the published software and the +following paper: + +- pandas on Zenodo, + Please find us on Zenodo and replace with the citation for the version you are using. You can replace the full author + list from there with "The pandas development team" like in the example below. + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` + +- [Data structures for statistical computing in python](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf), + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` + +## Brand and logo + +When using the project name _pandas_, please use it in lower case, even at the beginning of a sentence. + +The official logos of _pandas_ are: + +### Primary logo + + + + + + + + +### Secondary logo + + + + + + + + +### Logo mark + + + + + + + + +### Logo usage + +The pandas logo is available in full color and white accent. +The full color logo should only appear against white backgrounds. +The white accent logo should go against contrasting color background. + +When using the logo, please follow the next directives: + +- Primary logo should never be seen under 1 inch in size for printing and 72px for web +- The secondary logo should never be seen under 0.75 inch in size for printing and 55px for web +- Leave enough margin around the logo (leave the height of the logo in the top, bottom and both sides) +- Do not distort the logo by changing its proportions +- Do not place text or other elements on top of the logo + +### Colors + + + + + + + +
+ + + + +
+ Blue
+ RGB: R21 G4 B88
+ HEX: #150458 +
+ + + + +
+ Yellow
+ RGB: R255 G202 B0
+ HEX: #FFCA00 +
+ + + + +
+ Pink
+ RGB: R231 G4 B136
+ HEX: #E70488 +
From f5590441783d84dad5339004602d149356c2ec3c Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:25 -0500 Subject: [PATCH 135/184] New translations try.md (French) --- web/pandas/fr/try.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web/pandas/fr/try.md diff --git a/web/pandas/fr/try.md b/web/pandas/fr/try.md new file mode 100644 index 000000000..ee2f98b05 --- /dev/null +++ b/web/pandas/fr/try.md @@ -0,0 +1,12 @@ +# Try pandas in your browser (experimental) + +Try our experimental [JupyterLite](https://jupyterlite.readthedocs.io/en/stable/) live shell with `pandas`, powered by [Pyodide](https://pyodide.org/en/stable/). + +**Please note it can take a while (>30 seconds) before the shell is initialized and ready to run commands.** + +**Running it requires a reasonable amount of bandwidth and resources (>70 MiB on the first load), so it may not work properly on all devices or networks.** + + From cd64ff6ba7c612151ccd77a5ce99af2677eb7bd4 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:27 -0500 Subject: [PATCH 136/184] New translations try.md (Arabic) --- web/pandas/ar/try.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web/pandas/ar/try.md diff --git a/web/pandas/ar/try.md b/web/pandas/ar/try.md new file mode 100644 index 000000000..ee2f98b05 --- /dev/null +++ b/web/pandas/ar/try.md @@ -0,0 +1,12 @@ +# Try pandas in your browser (experimental) + +Try our experimental [JupyterLite](https://jupyterlite.readthedocs.io/en/stable/) live shell with `pandas`, powered by [Pyodide](https://pyodide.org/en/stable/). + +**Please note it can take a while (>30 seconds) before the shell is initialized and ready to run commands.** + +**Running it requires a reasonable amount of bandwidth and resources (>70 MiB on the first load), so it may not work properly on all devices or networks.** + + From 923485d4c1977552b2f4169ae0467e1d84e32808 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:28 -0500 Subject: [PATCH 137/184] New translations try.md (Catalan) --- web/pandas/ca/try.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web/pandas/ca/try.md diff --git a/web/pandas/ca/try.md b/web/pandas/ca/try.md new file mode 100644 index 000000000..ee2f98b05 --- /dev/null +++ b/web/pandas/ca/try.md @@ -0,0 +1,12 @@ +# Try pandas in your browser (experimental) + +Try our experimental [JupyterLite](https://jupyterlite.readthedocs.io/en/stable/) live shell with `pandas`, powered by [Pyodide](https://pyodide.org/en/stable/). + +**Please note it can take a while (>30 seconds) before the shell is initialized and ready to run commands.** + +**Running it requires a reasonable amount of bandwidth and resources (>70 MiB on the first load), so it may not work properly on all devices or networks.** + + From 7ec1fa0e27b7e01a02509683c605c4c8bf2ca2a7 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:29 -0500 Subject: [PATCH 138/184] New translations try.md (Japanese) --- web/pandas/ja/try.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web/pandas/ja/try.md diff --git a/web/pandas/ja/try.md b/web/pandas/ja/try.md new file mode 100644 index 000000000..ee2f98b05 --- /dev/null +++ b/web/pandas/ja/try.md @@ -0,0 +1,12 @@ +# Try pandas in your browser (experimental) + +Try our experimental [JupyterLite](https://jupyterlite.readthedocs.io/en/stable/) live shell with `pandas`, powered by [Pyodide](https://pyodide.org/en/stable/). + +**Please note it can take a while (>30 seconds) before the shell is initialized and ready to run commands.** + +**Running it requires a reasonable amount of bandwidth and resources (>70 MiB on the first load), so it may not work properly on all devices or networks.** + + From 45aaca5009509b7649454c910eb0c1650bcb4530 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:30 -0500 Subject: [PATCH 139/184] New translations try.md (Korean) --- web/pandas/ko/try.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web/pandas/ko/try.md diff --git a/web/pandas/ko/try.md b/web/pandas/ko/try.md new file mode 100644 index 000000000..ee2f98b05 --- /dev/null +++ b/web/pandas/ko/try.md @@ -0,0 +1,12 @@ +# Try pandas in your browser (experimental) + +Try our experimental [JupyterLite](https://jupyterlite.readthedocs.io/en/stable/) live shell with `pandas`, powered by [Pyodide](https://pyodide.org/en/stable/). + +**Please note it can take a while (>30 seconds) before the shell is initialized and ready to run commands.** + +**Running it requires a reasonable amount of bandwidth and resources (>70 MiB on the first load), so it may not work properly on all devices or networks.** + + From f77743200780c07ea9d5406a8745a4d9fb292fd9 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:31 -0500 Subject: [PATCH 140/184] New translations try.md (Polish) --- web/pandas/pl/try.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web/pandas/pl/try.md diff --git a/web/pandas/pl/try.md b/web/pandas/pl/try.md new file mode 100644 index 000000000..ee2f98b05 --- /dev/null +++ b/web/pandas/pl/try.md @@ -0,0 +1,12 @@ +# Try pandas in your browser (experimental) + +Try our experimental [JupyterLite](https://jupyterlite.readthedocs.io/en/stable/) live shell with `pandas`, powered by [Pyodide](https://pyodide.org/en/stable/). + +**Please note it can take a while (>30 seconds) before the shell is initialized and ready to run commands.** + +**Running it requires a reasonable amount of bandwidth and resources (>70 MiB on the first load), so it may not work properly on all devices or networks.** + + From ba8bc0393d3b1356049e853d1289692ddf634118 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:32 -0500 Subject: [PATCH 141/184] New translations try.md (Russian) --- web/pandas/ru/try.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web/pandas/ru/try.md diff --git a/web/pandas/ru/try.md b/web/pandas/ru/try.md new file mode 100644 index 000000000..ee2f98b05 --- /dev/null +++ b/web/pandas/ru/try.md @@ -0,0 +1,12 @@ +# Try pandas in your browser (experimental) + +Try our experimental [JupyterLite](https://jupyterlite.readthedocs.io/en/stable/) live shell with `pandas`, powered by [Pyodide](https://pyodide.org/en/stable/). + +**Please note it can take a while (>30 seconds) before the shell is initialized and ready to run commands.** + +**Running it requires a reasonable amount of bandwidth and resources (>70 MiB on the first load), so it may not work properly on all devices or networks.** + + From f02c6d95700ece3dbb5b633b8b93a1a61a0e8275 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:33 -0500 Subject: [PATCH 142/184] New translations try.md (Chinese Simplified) --- web/pandas/zh/try.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web/pandas/zh/try.md diff --git a/web/pandas/zh/try.md b/web/pandas/zh/try.md new file mode 100644 index 000000000..ee2f98b05 --- /dev/null +++ b/web/pandas/zh/try.md @@ -0,0 +1,12 @@ +# Try pandas in your browser (experimental) + +Try our experimental [JupyterLite](https://jupyterlite.readthedocs.io/en/stable/) live shell with `pandas`, powered by [Pyodide](https://pyodide.org/en/stable/). + +**Please note it can take a while (>30 seconds) before the shell is initialized and ready to run commands.** + +**Running it requires a reasonable amount of bandwidth and resources (>70 MiB on the first load), so it may not work properly on all devices or networks.** + + From 2d38bd1dc5ae7a6a57cddf72ab6f903a70948b92 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:35 -0500 Subject: [PATCH 143/184] New translations try.md (Persian) --- web/pandas/fa/try.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web/pandas/fa/try.md diff --git a/web/pandas/fa/try.md b/web/pandas/fa/try.md new file mode 100644 index 000000000..ee2f98b05 --- /dev/null +++ b/web/pandas/fa/try.md @@ -0,0 +1,12 @@ +# Try pandas in your browser (experimental) + +Try our experimental [JupyterLite](https://jupyterlite.readthedocs.io/en/stable/) live shell with `pandas`, powered by [Pyodide](https://pyodide.org/en/stable/). + +**Please note it can take a while (>30 seconds) before the shell is initialized and ready to run commands.** + +**Running it requires a reasonable amount of bandwidth and resources (>70 MiB on the first load), so it may not work properly on all devices or networks.** + + From 6849ba8b64f39c87bb3d56d15a37c97740aaf904 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:36 -0500 Subject: [PATCH 144/184] New translations try.md (Tamil) --- web/pandas/ta/try.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web/pandas/ta/try.md diff --git a/web/pandas/ta/try.md b/web/pandas/ta/try.md new file mode 100644 index 000000000..ee2f98b05 --- /dev/null +++ b/web/pandas/ta/try.md @@ -0,0 +1,12 @@ +# Try pandas in your browser (experimental) + +Try our experimental [JupyterLite](https://jupyterlite.readthedocs.io/en/stable/) live shell with `pandas`, powered by [Pyodide](https://pyodide.org/en/stable/). + +**Please note it can take a while (>30 seconds) before the shell is initialized and ready to run commands.** + +**Running it requires a reasonable amount of bandwidth and resources (>70 MiB on the first load), so it may not work properly on all devices or networks.** + + From 9755f4808770a2523c57498f0e12bd002553eabd Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:09:37 -0500 Subject: [PATCH 145/184] New translations try.md (Hindi) --- web/pandas/hi/try.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web/pandas/hi/try.md diff --git a/web/pandas/hi/try.md b/web/pandas/hi/try.md new file mode 100644 index 000000000..ee2f98b05 --- /dev/null +++ b/web/pandas/hi/try.md @@ -0,0 +1,12 @@ +# Try pandas in your browser (experimental) + +Try our experimental [JupyterLite](https://jupyterlite.readthedocs.io/en/stable/) live shell with `pandas`, powered by [Pyodide](https://pyodide.org/en/stable/). + +**Please note it can take a while (>30 seconds) before the shell is initialized and ready to run commands.** + +**Running it requires a reasonable amount of bandwidth and resources (>70 MiB on the first load), so it may not work properly on all devices or networks.** + + From c5c7d9e52e1dd02f31c0c7dcc4f9a63afc33a323 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:14 -0500 Subject: [PATCH 146/184] New translations navbar.yml (French) --- web/pandas/fr/navbar.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web/pandas/fr/navbar.yml diff --git a/web/pandas/fr/navbar.yml b/web/pandas/fr/navbar.yml new file mode 100644 index 000000000..bee6a8430 --- /dev/null +++ b/web/pandas/fr/navbar.yml @@ -0,0 +1,33 @@ +navbar: + - name: "About us" + target: + - name: "A propos de pandas" + target: about/ + - name: "Project roadmap" + target: about/roadmap.html + - name: "Governance" + target: about/governance.html + - name: "Team" + target: about/team.html + - name: "Sponsors" + target: about/sponsors.html + - name: "Citing and logo" + target: about/citing.html + - name: "Prise en main" + target: getting_started.html + - name: "Documentation" + target: docs/ + - name: "Communauté" + target: + - name: "Blog" + target: community/blog/ + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Code of conduct" + target: community/coc.html + - name: "Écosystème" + target: community/ecosystem.html + - name: "Benchmarks" + target: community/benchmarks.html + - name: "Contribute" + target: contribute.html From fb3e939b9f491f2711fd7e0b3790b4c7ae28311b Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:15 -0500 Subject: [PATCH 147/184] New translations navbar.yml (Spanish) --- web/pandas/es/navbar.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/pandas/es/navbar.yml b/web/pandas/es/navbar.yml index b4c302d7d..ddb621deb 100644 --- a/web/pandas/es/navbar.yml +++ b/web/pandas/es/navbar.yml @@ -30,4 +30,4 @@ navbar: - name: "Pruebas de rendimiento" target: community/benchmarks.html - name: "Contribuir" - target: contribute.html \ No newline at end of file + target: contribute.html From 8b7d6c4e32d6aac5b8c1512e747ab4918977e9d8 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:16 -0500 Subject: [PATCH 148/184] New translations navbar.yml (Arabic) --- web/pandas/ar/navbar.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web/pandas/ar/navbar.yml diff --git a/web/pandas/ar/navbar.yml b/web/pandas/ar/navbar.yml new file mode 100644 index 000000000..bcc2d062f --- /dev/null +++ b/web/pandas/ar/navbar.yml @@ -0,0 +1,33 @@ +navbar: + - name: "About us" + target: + - name: "About pandas" + target: about/ + - name: "Project roadmap" + target: about/roadmap.html + - name: "Governance" + target: about/governance.html + - name: "Team" + target: about/team.html + - name: "Sponsors" + target: about/sponsors.html + - name: "Citing and logo" + target: about/citing.html + - name: "Getting started" + target: getting_started.html + - name: "Documentation" + target: docs/ + - name: "Community" + target: + - name: "Blog" + target: community/blog/ + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Code of conduct" + target: community/coc.html + - name: "Ecosystem" + target: community/ecosystem.html + - name: "Benchmarks" + target: community/benchmarks.html + - name: "Contribute" + target: contribute.html From 496859a6c783e6d2664890f3a9550c12acc1ac57 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:17 -0500 Subject: [PATCH 149/184] New translations navbar.yml (Catalan) --- web/pandas/ca/navbar.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web/pandas/ca/navbar.yml diff --git a/web/pandas/ca/navbar.yml b/web/pandas/ca/navbar.yml new file mode 100644 index 000000000..bcc2d062f --- /dev/null +++ b/web/pandas/ca/navbar.yml @@ -0,0 +1,33 @@ +navbar: + - name: "About us" + target: + - name: "About pandas" + target: about/ + - name: "Project roadmap" + target: about/roadmap.html + - name: "Governance" + target: about/governance.html + - name: "Team" + target: about/team.html + - name: "Sponsors" + target: about/sponsors.html + - name: "Citing and logo" + target: about/citing.html + - name: "Getting started" + target: getting_started.html + - name: "Documentation" + target: docs/ + - name: "Community" + target: + - name: "Blog" + target: community/blog/ + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Code of conduct" + target: community/coc.html + - name: "Ecosystem" + target: community/ecosystem.html + - name: "Benchmarks" + target: community/benchmarks.html + - name: "Contribute" + target: contribute.html From 35abd626ec0c86f4026b4a2f48554e002b9dbf0b Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:18 -0500 Subject: [PATCH 150/184] New translations navbar.yml (Japanese) --- web/pandas/ja/navbar.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web/pandas/ja/navbar.yml diff --git a/web/pandas/ja/navbar.yml b/web/pandas/ja/navbar.yml new file mode 100644 index 000000000..180cdf9ea --- /dev/null +++ b/web/pandas/ja/navbar.yml @@ -0,0 +1,33 @@ +navbar: + - name: "About us" + target: + - name: "About pandas" + target: about/ + - name: "Project roadmap" + target: about/roadmap.html + - name: "Governance" + target: about/governance.html + - name: "Team" + target: about/team.html + - name: "Sponsors" + target: about/sponsors.html + - name: "Citing and logo" + target: about/citing.html + - name: "はじめに" + target: getting_started.html + - name: "Documentation" + target: docs/ + - name: "Community" + target: + - name: "Blog" + target: community/blog/ + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Code of conduct" + target: community/coc.html + - name: "Ecosystem" + target: community/ecosystem.html + - name: "Benchmarks" + target: community/benchmarks.html + - name: "Contribute" + target: contribute.html From 4ff7d837cc51d44abe1cc1c1e50608625319f6dd Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:19 -0500 Subject: [PATCH 151/184] New translations navbar.yml (Korean) --- web/pandas/ko/navbar.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web/pandas/ko/navbar.yml diff --git a/web/pandas/ko/navbar.yml b/web/pandas/ko/navbar.yml new file mode 100644 index 000000000..bcc2d062f --- /dev/null +++ b/web/pandas/ko/navbar.yml @@ -0,0 +1,33 @@ +navbar: + - name: "About us" + target: + - name: "About pandas" + target: about/ + - name: "Project roadmap" + target: about/roadmap.html + - name: "Governance" + target: about/governance.html + - name: "Team" + target: about/team.html + - name: "Sponsors" + target: about/sponsors.html + - name: "Citing and logo" + target: about/citing.html + - name: "Getting started" + target: getting_started.html + - name: "Documentation" + target: docs/ + - name: "Community" + target: + - name: "Blog" + target: community/blog/ + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Code of conduct" + target: community/coc.html + - name: "Ecosystem" + target: community/ecosystem.html + - name: "Benchmarks" + target: community/benchmarks.html + - name: "Contribute" + target: contribute.html From cf0eb01541d7830323e13c91ece8ed851d3970b6 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:20 -0500 Subject: [PATCH 152/184] New translations navbar.yml (Polish) --- web/pandas/pl/navbar.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web/pandas/pl/navbar.yml diff --git a/web/pandas/pl/navbar.yml b/web/pandas/pl/navbar.yml new file mode 100644 index 000000000..bcc2d062f --- /dev/null +++ b/web/pandas/pl/navbar.yml @@ -0,0 +1,33 @@ +navbar: + - name: "About us" + target: + - name: "About pandas" + target: about/ + - name: "Project roadmap" + target: about/roadmap.html + - name: "Governance" + target: about/governance.html + - name: "Team" + target: about/team.html + - name: "Sponsors" + target: about/sponsors.html + - name: "Citing and logo" + target: about/citing.html + - name: "Getting started" + target: getting_started.html + - name: "Documentation" + target: docs/ + - name: "Community" + target: + - name: "Blog" + target: community/blog/ + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Code of conduct" + target: community/coc.html + - name: "Ecosystem" + target: community/ecosystem.html + - name: "Benchmarks" + target: community/benchmarks.html + - name: "Contribute" + target: contribute.html From db0f2fdb2261c08bbcfa5dc7be957ed0869ba060 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:21 -0500 Subject: [PATCH 153/184] New translations navbar.yml (Russian) --- web/pandas/ru/navbar.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web/pandas/ru/navbar.yml diff --git a/web/pandas/ru/navbar.yml b/web/pandas/ru/navbar.yml new file mode 100644 index 000000000..bcc2d062f --- /dev/null +++ b/web/pandas/ru/navbar.yml @@ -0,0 +1,33 @@ +navbar: + - name: "About us" + target: + - name: "About pandas" + target: about/ + - name: "Project roadmap" + target: about/roadmap.html + - name: "Governance" + target: about/governance.html + - name: "Team" + target: about/team.html + - name: "Sponsors" + target: about/sponsors.html + - name: "Citing and logo" + target: about/citing.html + - name: "Getting started" + target: getting_started.html + - name: "Documentation" + target: docs/ + - name: "Community" + target: + - name: "Blog" + target: community/blog/ + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Code of conduct" + target: community/coc.html + - name: "Ecosystem" + target: community/ecosystem.html + - name: "Benchmarks" + target: community/benchmarks.html + - name: "Contribute" + target: contribute.html From cb66c5053283226705b3a4995c316e16c6abc826 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:22 -0500 Subject: [PATCH 154/184] New translations navbar.yml (Chinese Simplified) --- web/pandas/zh/navbar.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web/pandas/zh/navbar.yml diff --git a/web/pandas/zh/navbar.yml b/web/pandas/zh/navbar.yml new file mode 100644 index 000000000..bcc2d062f --- /dev/null +++ b/web/pandas/zh/navbar.yml @@ -0,0 +1,33 @@ +navbar: + - name: "About us" + target: + - name: "About pandas" + target: about/ + - name: "Project roadmap" + target: about/roadmap.html + - name: "Governance" + target: about/governance.html + - name: "Team" + target: about/team.html + - name: "Sponsors" + target: about/sponsors.html + - name: "Citing and logo" + target: about/citing.html + - name: "Getting started" + target: getting_started.html + - name: "Documentation" + target: docs/ + - name: "Community" + target: + - name: "Blog" + target: community/blog/ + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Code of conduct" + target: community/coc.html + - name: "Ecosystem" + target: community/ecosystem.html + - name: "Benchmarks" + target: community/benchmarks.html + - name: "Contribute" + target: contribute.html From 432968d41b0d164da50f958b8a6c595a95b22ba8 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:24 -0500 Subject: [PATCH 155/184] New translations navbar.yml (Persian) --- web/pandas/fa/navbar.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web/pandas/fa/navbar.yml diff --git a/web/pandas/fa/navbar.yml b/web/pandas/fa/navbar.yml new file mode 100644 index 000000000..cd7c3d762 --- /dev/null +++ b/web/pandas/fa/navbar.yml @@ -0,0 +1,33 @@ +navbar: + - name: "About us" + target: + - name: "About pandas" + target: about/ + - name: "Project roadmap" + target: about/roadmap.html + - name: "دستور کار (حاکمیت)" + target: about/governance.html + - name: "Team" + target: about/team.html + - name: "Sponsors" + target: about/sponsors.html + - name: "استناد و نشان" + target: about/citing.html + - name: "Getting started" + target: getting_started.html + - name: "Documentation" + target: docs/ + - name: "Community" + target: + - name: "Blog" + target: community/blog/ + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Code of conduct" + target: community/coc.html + - name: "Ecosystem" + target: community/ecosystem.html + - name: "Benchmarks" + target: community/benchmarks.html + - name: "Contribute" + target: contribute.html From 9dbefbdac0f64d0765317e68cfc241f1f4814171 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:25 -0500 Subject: [PATCH 156/184] New translations navbar.yml (Tamil) --- web/pandas/ta/navbar.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web/pandas/ta/navbar.yml diff --git a/web/pandas/ta/navbar.yml b/web/pandas/ta/navbar.yml new file mode 100644 index 000000000..bcc2d062f --- /dev/null +++ b/web/pandas/ta/navbar.yml @@ -0,0 +1,33 @@ +navbar: + - name: "About us" + target: + - name: "About pandas" + target: about/ + - name: "Project roadmap" + target: about/roadmap.html + - name: "Governance" + target: about/governance.html + - name: "Team" + target: about/team.html + - name: "Sponsors" + target: about/sponsors.html + - name: "Citing and logo" + target: about/citing.html + - name: "Getting started" + target: getting_started.html + - name: "Documentation" + target: docs/ + - name: "Community" + target: + - name: "Blog" + target: community/blog/ + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Code of conduct" + target: community/coc.html + - name: "Ecosystem" + target: community/ecosystem.html + - name: "Benchmarks" + target: community/benchmarks.html + - name: "Contribute" + target: contribute.html From 06b59e7b74ef47e376b2c992e8f26fb93da45f5d Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 13 May 2025 13:16:26 -0500 Subject: [PATCH 157/184] New translations navbar.yml (Hindi) --- web/pandas/hi/navbar.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web/pandas/hi/navbar.yml diff --git a/web/pandas/hi/navbar.yml b/web/pandas/hi/navbar.yml new file mode 100644 index 000000000..bcc2d062f --- /dev/null +++ b/web/pandas/hi/navbar.yml @@ -0,0 +1,33 @@ +navbar: + - name: "About us" + target: + - name: "About pandas" + target: about/ + - name: "Project roadmap" + target: about/roadmap.html + - name: "Governance" + target: about/governance.html + - name: "Team" + target: about/team.html + - name: "Sponsors" + target: about/sponsors.html + - name: "Citing and logo" + target: about/citing.html + - name: "Getting started" + target: getting_started.html + - name: "Documentation" + target: docs/ + - name: "Community" + target: + - name: "Blog" + target: community/blog/ + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Code of conduct" + target: community/coc.html + - name: "Ecosystem" + target: community/ecosystem.html + - name: "Benchmarks" + target: community/benchmarks.html + - name: "Contribute" + target: contribute.html From 1ff895ab164575db946e1000ba54851d2a8d8d93 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:22 -0500 Subject: [PATCH 158/184] New translations navbar.yml (French) --- web/pandas/fr/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/fr/navbar.yml b/web/pandas/fr/navbar.yml index bee6a8430..969e974f9 100644 --- a/web/pandas/fr/navbar.yml +++ b/web/pandas/fr/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentation" target: docs/ + translated: false - name: "Communauté" target: - name: "Blog" From 312d251a2e5c4ae43fa02a02e36f1a940e15f05e Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:23 -0500 Subject: [PATCH 159/184] New translations navbar.yml (Spanish) --- web/pandas/es/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/es/navbar.yml b/web/pandas/es/navbar.yml index ddb621deb..bdcbc36c1 100644 --- a/web/pandas/es/navbar.yml +++ b/web/pandas/es/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentación" target: docs/ + translated: false - name: "Comunidad" target: - name: "Blog" From f9bcfe5e7827c1477fdf67131ad466ebde700919 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:25 -0500 Subject: [PATCH 160/184] New translations navbar.yml (Arabic) --- web/pandas/ar/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/ar/navbar.yml b/web/pandas/ar/navbar.yml index bcc2d062f..07ff0a643 100644 --- a/web/pandas/ar/navbar.yml +++ b/web/pandas/ar/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentation" target: docs/ + translated: false - name: "Community" target: - name: "Blog" From 3acaf8c5ca0126c578ef3633c1a1dfb1ff9033bd Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:26 -0500 Subject: [PATCH 161/184] New translations navbar.yml (Catalan) --- web/pandas/ca/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/ca/navbar.yml b/web/pandas/ca/navbar.yml index bcc2d062f..07ff0a643 100644 --- a/web/pandas/ca/navbar.yml +++ b/web/pandas/ca/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentation" target: docs/ + translated: false - name: "Community" target: - name: "Blog" From dd9dd9489297850be8b016f0358f645437304e66 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:27 -0500 Subject: [PATCH 162/184] New translations navbar.yml (Japanese) --- web/pandas/ja/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/ja/navbar.yml b/web/pandas/ja/navbar.yml index 180cdf9ea..bec024a84 100644 --- a/web/pandas/ja/navbar.yml +++ b/web/pandas/ja/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentation" target: docs/ + translated: false - name: "Community" target: - name: "Blog" From e1905ddf09dfefbb21f06c447732e3cccaab32a1 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:28 -0500 Subject: [PATCH 163/184] New translations navbar.yml (Korean) --- web/pandas/ko/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/ko/navbar.yml b/web/pandas/ko/navbar.yml index bcc2d062f..07ff0a643 100644 --- a/web/pandas/ko/navbar.yml +++ b/web/pandas/ko/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentation" target: docs/ + translated: false - name: "Community" target: - name: "Blog" From 7dd7edbc317a04fb575be33793e6d343ee446ff0 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:29 -0500 Subject: [PATCH 164/184] New translations navbar.yml (Polish) --- web/pandas/pl/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/pl/navbar.yml b/web/pandas/pl/navbar.yml index bcc2d062f..07ff0a643 100644 --- a/web/pandas/pl/navbar.yml +++ b/web/pandas/pl/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentation" target: docs/ + translated: false - name: "Community" target: - name: "Blog" From 350e3436aee8b7a2ed69608a27a1a73f2e5e066d Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:30 -0500 Subject: [PATCH 165/184] New translations navbar.yml (Russian) --- web/pandas/ru/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/ru/navbar.yml b/web/pandas/ru/navbar.yml index bcc2d062f..07ff0a643 100644 --- a/web/pandas/ru/navbar.yml +++ b/web/pandas/ru/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentation" target: docs/ + translated: false - name: "Community" target: - name: "Blog" From 9cc9fa57e85d593af01afa05b9c70932e9124ebd Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:32 -0500 Subject: [PATCH 166/184] New translations navbar.yml (Chinese Simplified) --- web/pandas/zh/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/zh/navbar.yml b/web/pandas/zh/navbar.yml index bcc2d062f..07ff0a643 100644 --- a/web/pandas/zh/navbar.yml +++ b/web/pandas/zh/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentation" target: docs/ + translated: false - name: "Community" target: - name: "Blog" From f7ae2e0a7977aea63444372450d1c00f71682f4f Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:33 -0500 Subject: [PATCH 167/184] New translations navbar.yml (Portuguese, Brazilian) --- web/pandas/pt/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/pt/navbar.yml b/web/pandas/pt/navbar.yml index 52bc79c4e..49e9b4e8b 100644 --- a/web/pandas/pt/navbar.yml +++ b/web/pandas/pt/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentação" target: docs/ + translated: false - name: "Comunidade" target: - name: "Blog" From 8af1f49e245ac44aa4e52ed6947e1e1d667dba1a Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:34 -0500 Subject: [PATCH 168/184] New translations navbar.yml (Persian) --- web/pandas/fa/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/fa/navbar.yml b/web/pandas/fa/navbar.yml index cd7c3d762..f374d0e28 100644 --- a/web/pandas/fa/navbar.yml +++ b/web/pandas/fa/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentation" target: docs/ + translated: false - name: "Community" target: - name: "Blog" From 0b3941be5787dca4b25c8ab717c5428e637d998f Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:35 -0500 Subject: [PATCH 169/184] New translations navbar.yml (Tamil) --- web/pandas/ta/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/ta/navbar.yml b/web/pandas/ta/navbar.yml index bcc2d062f..07ff0a643 100644 --- a/web/pandas/ta/navbar.yml +++ b/web/pandas/ta/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentation" target: docs/ + translated: false - name: "Community" target: - name: "Blog" From 1b8edbfc99665a01f56d3a31ec94054baee650af Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 19 May 2025 18:10:36 -0500 Subject: [PATCH 170/184] New translations navbar.yml (Hindi) --- web/pandas/hi/navbar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/web/pandas/hi/navbar.yml b/web/pandas/hi/navbar.yml index bcc2d062f..07ff0a643 100644 --- a/web/pandas/hi/navbar.yml +++ b/web/pandas/hi/navbar.yml @@ -17,6 +17,7 @@ navbar: target: getting_started.html - name: "Documentation" target: docs/ + translated: false - name: "Community" target: - name: "Blog" From eaf70232ed3b932ec790e4fc88c16bf770298d79 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 26 May 2025 13:33:41 -0500 Subject: [PATCH 171/184] New translations governance.md (Persian) --- web/pandas/fa/about/governance.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/web/pandas/fa/about/governance.md b/web/pandas/fa/about/governance.md index b04a307c2..d1f20fa9a 100644 --- a/web/pandas/fa/about/governance.md +++ b/web/pandas/fa/about/governance.md @@ -76,9 +76,7 @@ BDFL می‌تواند جانشین خود را منصوب کند، اما ان - منافع مالی، مانند سرمایه‌گذاری‌ها، اشتغال یا قراردادهای کاری خارج از پروژه که ممکن است بر فعالیت آن‌ها در پروژه تأثیر بگذارد. - دسترسی به اطلاعات اختصاصی کارفرمای خود که ممکن است به طور ناخواسته به فعالیت‌های آن‌ها در پروژه نفوذ کند. -تمام اعضای تیم اصلی، از جمله BDFL، باید هرگونه تعارض منافع احتمالی خود را به سایر اعضای تیم اصلی اطلاع دهند. اعضایی که در یک موضوع خاص تعارض منافع دارند، می‌توانند در بحث‌های تیم اصلی درباره آن موضوع شرکت کنند، اما باید از رأی دادن در مورد آن موضوع خودداری کنند. If the BDFL has -recused his/herself for a particular decision, they will appoint a substitute -BDFL for that decision. +تمام اعضای تیم اصلی، از جمله BDFL، باید هرگونه تعارض منافع احتمالی خود را به سایر اعضای تیم اصلی اطلاع دهند. اعضایی که در یک موضوع خاص تعارض منافع دارند، می‌توانند در بحث‌های تیم اصلی درباره آن موضوع شرکت کنند، اما باید از رأی دادن در مورد آن موضوع خودداری کنند. اگر BDFL در یک تصمیم‌گیری خاص از رأی دادن کناره‌گیری کند، برای آن تصمیم یک BDFL جایگزین منصوب خواهد کرد. ### Private communications of the Core Team From df06e741c53af0c753b623404b846dce6b51fa21 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 26 May 2025 14:28:56 -0500 Subject: [PATCH 172/184] New translations index.md (Persian) --- web/pandas/fa/about/index.md | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/web/pandas/fa/about/index.md b/web/pandas/fa/about/index.md index deb04a9c2..bb1d28502 100644 --- a/web/pandas/fa/about/index.md +++ b/web/pandas/fa/about/index.md @@ -1,21 +1,18 @@ -# About pandas +# درباره پانداس -## History of development +## تاریخچهٔ توسعه -In 2008, _pandas_ development began at [AQR Capital Management](https://www.aqr.com). -By the end of 2009 it had been [open sourced](https://en.wikipedia.org/wiki/Open_source), -and is actively supported today by a community of like-minded individuals around the world who -contribute their valuable time and energy to help make open source _pandas_ -possible. Thank you to [all of our contributors](team.html). +در سال ۲۰۰۸، توسعه‌ی _پانداس_ در شرکت [AQR Capital Management](https://www.aqr.com) آغاز شد. +تا پایان سال ۲۰۰۹، این پروژه به‌صورت [متن‌باز](https://en.wikipedia.org/wiki/Open_source) منتشر شد و امروزه به‌طور فعال توسط جامعه‌ای از افراد هم‌فکر در سراسر جهان پشتیبانی می‌شود که زمان و انرژی ارزشمند خود را برای ممکن ساختن توسعه متن‌باز پانداس صرف می‌کنند. سپاس از [همه‌ی مشارکت‌کنندگان ما](team.html). -Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). -This will help ensure the success of development of _pandas_ as a world-class open-source project. +از سال ۲۰۱۵، pandas یکی از [پروژه‌های مورد حمایت NumFOCUS](https://numfocus.org/sponsored-projects) بوده است. +این امر به موفقیت توسعه‌ی _پانداس_ به عنوان یک پروژهٔ متن‌باز در سطح جهانی کمک خواهد کرد. -### Timeline +### خط‌زمان -- **2008**: Development of _pandas_ started -- **2009**: _pandas_ becomes open source -- **2012**: First edition of _Python for Data Analysis_ is published +- **۲۰۰۸**: توسعهٔ _پانداس_ آغاز شد +- **۲۰۰۹**: _پانداس_ به‌صورت متن‌باز منتشر شد +- **۲۰۱۲**: نخستین ویرایش _Python for Data Analysis_ منتشر شد - **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) - **2018**: First in-person core developer sprint From c045758fa1e6d516be1edb0b6b7b4e44f02ec49f Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 26 May 2025 14:28:58 -0500 Subject: [PATCH 173/184] New translations roadmap.md (Persian) --- web/pandas/fa/about/roadmap.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/pandas/fa/about/roadmap.md b/web/pandas/fa/about/roadmap.md index bd52e545a..13dcfdf02 100644 --- a/web/pandas/fa/about/roadmap.md +++ b/web/pandas/fa/about/roadmap.md @@ -142,7 +142,7 @@ should result on cleaner, simpler, and more performant code. 7. Use of trial and error should be limited, and anyway restricted to catch only exceptions which are actually expected (typically `KeyError`). -- In particular, code should never (intentionally) raise new exceptions in the `except` portion of a `try... exception` +- به‌ویژه، در بخش except از یک بلوک try، کد هرگز نباید (به‌صورت عمدی) excpet جدیدی ایجاد کند... exception\` 8. Any code portion which is not specific to setters and getters must be shared, and when small differences in behavior are expected (e.g. getting with `.loc` raises for From 6d14dda729045e17dc760b1b8d11c7a92ac1da64 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 26 May 2025 15:30:35 -0500 Subject: [PATCH 174/184] New translations index.md (Persian) --- web/pandas/fa/about/index.md | 78 ++++++++++++++---------------------- 1 file changed, 30 insertions(+), 48 deletions(-) diff --git a/web/pandas/fa/about/index.md b/web/pandas/fa/about/index.md index bb1d28502..f01d0c096 100644 --- a/web/pandas/fa/about/index.md +++ b/web/pandas/fa/about/index.md @@ -13,71 +13,53 @@ - **۲۰۰۸**: توسعهٔ _پانداس_ آغاز شد - **۲۰۰۹**: _پانداس_ به‌صورت متن‌باز منتشر شد - **۲۰۱۲**: نخستین ویرایش _Python for Data Analysis_ منتشر شد -- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) -- **2018**: First in-person core developer sprint +- **۲۰۱۵**: _پانداس_ به یکی از [پروژه‌های مورد حمایت NumFOCUS](https://numfocus.org/sponsored-projects) تبدیل شد +- **۲۰۱۸**: نخستین گردهمایی حضوری توسعه‌دهندگان اصلی برگزار شد -## Library Highlights +## برجسته‌ترین ویژگی‌های کتابخانه -- A fast and efficient **DataFrame** object for data manipulation with - integrated indexing; +- یک شیء DataFrame سریع و کارآمد برای دست‌کاری داده‌ها با قابلیت نمایه‌گذاری (ایندکس‌گذاری) یکپارچه؛ -- Tools for **reading and writing data** between in-memory data structures and - different formats: CSV and text files, Microsoft Excel, SQL databases, and - the fast HDF5 format; +- ابزارهایی **برای خواندن و نوشتن داده‌ها** بین ساختارهای داده در حافظه و قالب‌های گوناگون مانند: فایل‌های CSV و متنی، فایل‌های Microsoft Excel، پایگاه‌های داده SQL، و قالب سریع HDF5؛ -- Intelligent **data alignment** and integrated handling of **missing data**: - gain automatic label-based alignment in computations and easily manipulate - messy data into an orderly form; +- **هم‌ترازی هوشمندانه‌ی داده‌ها** و مدیریت یکپارچه‌ی **داده‌های گمشده**: + در محاسبات، هم‌ترازی خودکار بر پایه‌ی برچسب به‌دست آورید و داده‌های به‌هم‌ریخته را به‌سادگی به شکلی منظم تبدیل کنید؛ -- Flexible **reshaping** and pivoting of data sets; +- **تغییر شکل** و چرخاندن (Pivot) داده‌ها به‌صورت انعطاف‌پذیر؛ -- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** - of large data sets; +- **برش‌زنی** هوشمند بر پایه‌ی برچسب، **ایندکس‌گذاری پیشرفته** و **زیرمجموعه‌سازی** داده‌های حجیم؛ -- Columns can be inserted and deleted from data structures for **size - mutability**; +- ستون‌ها را می‌توان برای **تغییرپذیری اندازه** به ساختارهای داده‌ای افزود یا از آن‌ها حذف کرد؛ -- Aggregating or transforming data with a powerful **group by** engine - allowing split-apply-combine operations on data sets; +- تجمیع یا دگرگونی داده‌ها با موتور قدرتمند **گروه‌بندی** (group by) که عملیات تقسیم–اعمال–ترکیب (split-apply-combine) را بر روی مجموعه‌های داده‌ای ممکن می‌سازد؛ -- High performance **merging and joining** of data sets; +- **ادغام و پیوند** داده‌ها با کارایی بالا؛ -- **Hierarchical axis indexing** provides an intuitive way of working with - high-dimensional data in a lower-dimensional data structure; +- **ایندکس‌گذاری محوری سلسله‌مراتبی** روشی شهودی برای کار با داده‌های چندبعدی در قالب ساختارهای داده‌ای کم‌بعد فراهم می‌کند؛ -- **Time series**-functionality: date range generation and frequency - conversion, moving window statistics, date shifting and lagging. - Even create domain-specific time offsets and join time - series without losing data; +- قابلیت‌های مربوط به **سری‌های زمانی**: تولید بازه‌های زمانی و تبدیل بازه‌ها، آمارگیری با پنجره‌ی متحرک، جابه‌جایی و تأخیر زمانی. + حتی می‌توان افست‌های زمانی خاص دامنه ایجاد کرد و سری‌های زمانی را بدون از دست دادن داده‌ها به هم پیوند داد؛ -- Highly **optimized for performance**, with critical code paths written in - [Cython](https://cython.org) or C. +- به‌شدت **بهینه‌شده برای کارایی**، با مسیرهای حیاتی کد که به زبان [Cython](https://cython.org) یا C نوشته شده‌اند. -- Python with _pandas_ is in use in a wide variety of **academic and - commercial** domains, including Finance, Neuroscience, Economics, - Statistics, Advertising, Web Analytics, and more. +- پایتون همراه با _پانداس_ در گستره‌ی وسیعی از **حوزه‌های دانشگاهی و بازرگانی** به‌کار گرفته می‌شود، از جمله امور مالی، علوم اعصاب، اقتصاد، آمار، تبلیغات، تحلیل وب، و دیگر زمینه‌ها. -## Mission +## ماموریت -_pandas_ aims to be the fundamental high-level building block for doing practical, -real world data analysis in Python. -Additionally, it has the broader goal of becoming the most powerful and flexible -open source data analysis / manipulation tool available in any language. +_پانداس_ بر آن است که به عنوان سنگ‌بنای بنیادین و سطح‌بالا برای تحلیل داده‌های کاربردی و واقعی در پایتون عمل کند. +افزون بر این، هدف گسترده‌تری نیز دارد: تبدیل شدن به نیرومندترین و انعطاف‌پذیرترین ابزار متن‌باز برای تحلیل و پردازش داده در هر زبانی. -## Vision +## چشم‌انداز -A world where data analytics and manipulation software is: +جهانی که در آن نرم‌افزارهای تحلیل و پردازش داده: -- Accessible to everyone -- Free for users to use and modify -- Flexible -- Powerful -- Easy to use -- Fast +- در دسترس همگان باشد؛ +- برای کاربران آزاد باشد تا آن را به‌کار ببرند و دگرگون کنند؛ +- انعطاف‌پذیر باشد؛ +- نیرومند باشد؛ +- ساده و آسان برای استفاده باشد؛ +- سریع باشد؛ -## Values +## ارزش‌ها -Is in the core of _pandas_ to be respectful and welcoming with everybody, -users, contributors and the broader community. Regardless of level of experience, -gender, gender identity and expression, sexual orientation, disability, -personal appearance, body size, race, ethnicity, age, religion, or nationality. +در هسته‌ی _پانداس_ این اصل نهفته است که با همه‌ی کاربران، مشارکت‌کنندگان، و جامعه‌ی گسترده‌تر—با احترام و رویکردی پذیرنده برخورد شود. فارغ از سطح تجربه، جنسیت، هویت و بیان جنسیتی، گرایش جنسی، ناتوانی، ظاهر فردی، اندازه‌ی بدن، نژاد، قومیت، سن، دین یا ملیت. From fb68e572fd11e03bf68ba9303ffe7ab0cc11ceba Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 27 May 2025 13:58:38 -0500 Subject: [PATCH 175/184] New translations governance.md (Persian) --- web/pandas/fa/about/governance.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/web/pandas/fa/about/governance.md b/web/pandas/fa/about/governance.md index d1f20fa9a..3fa2f65f2 100644 --- a/web/pandas/fa/about/governance.md +++ b/web/pandas/fa/about/governance.md @@ -78,11 +78,9 @@ BDFL می‌تواند جانشین خود را منصوب کند، اما ان تمام اعضای تیم اصلی، از جمله BDFL، باید هرگونه تعارض منافع احتمالی خود را به سایر اعضای تیم اصلی اطلاع دهند. اعضایی که در یک موضوع خاص تعارض منافع دارند، می‌توانند در بحث‌های تیم اصلی درباره آن موضوع شرکت کنند، اما باید از رأی دادن در مورد آن موضوع خودداری کنند. اگر BDFL در یک تصمیم‌گیری خاص از رأی دادن کناره‌گیری کند، برای آن تصمیم یک BDFL جایگزین منصوب خواهد کرد. -### Private communications of the Core Team +### ارتباط‌های خصوصی تیم اصلی -Unless specifically required, all Core Team discussions and activities will be -public and done in collaboration and discussion with the Project Contributors -and Community. The Core Team will have a private mailing list that will be used +مگر در مواردی که ضرورت خاصی وجود داشته باشد، تمام گفت‌وگوها و فعالیت‌های تیم اصلی به‌صورت عمومی و با همکاری و مشارکت مشارکت‌کنندگان پروژه و جامعه انجام خواهد شد. The Core Team will have a private mailing list that will be used sparingly and only when a specific matter requires privacy. When private communications and decisions are needed, the Core Team will do its best to summarize those to the Community after eliding personal/private/sensitive From e055cf9ce54e066b63d7433f5e1de11c84e72adb Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Tue, 27 May 2025 15:00:05 -0500 Subject: [PATCH 176/184] New translations governance.md (Persian) --- web/pandas/fa/about/governance.md | 32 +++++++++---------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/web/pandas/fa/about/governance.md b/web/pandas/fa/about/governance.md index 3fa2f65f2..ac004c480 100644 --- a/web/pandas/fa/about/governance.md +++ b/web/pandas/fa/about/governance.md @@ -80,29 +80,15 @@ BDFL می‌تواند جانشین خود را منصوب کند، اما ان ### ارتباط‌های خصوصی تیم اصلی -مگر در مواردی که ضرورت خاصی وجود داشته باشد، تمام گفت‌وگوها و فعالیت‌های تیم اصلی به‌صورت عمومی و با همکاری و مشارکت مشارکت‌کنندگان پروژه و جامعه انجام خواهد شد. The Core Team will have a private mailing list that will be used -sparingly and only when a specific matter requires privacy. When private -communications and decisions are needed, the Core Team will do its best to -summarize those to the Community after eliding personal/private/sensitive -information that should not be posted to the public internet. - -### Subcommittees - -The Core Team can create subcommittees that provide leadership and guidance for -specific aspects of the project. Like the Core Team as a whole, subcommittees -should conduct their business in an open and public manner unless privacy is -specifically called for. Private subcommittee communications should happen on -the main private mailing list of the Core Team unless specifically called for. - -Question: if the BDFL is not on a subcommittee, do they still have override -authority? - -Suggestion: they do, but they should appoint a delegate who plays that role -most of the time, and explicit BDFL intervention is sought only if the -committee disagrees with that delegate’s decision and no resolution is possible -within the team. This is different from a BDFL delegate for a specific decision -(or a recusal situation), where the BDFL is literally giving up his/her -authority to someone else in full. It’s more like what Linus Torvalds uses with his +مگر در مواردی که ضرورت خاصی وجود داشته باشد، تمام گفت‌وگوها و فعالیت‌های تیم اصلی به‌صورت عمومی و با همکاری و مشارکت مشارکت‌کنندگان پروژه و جامعه انجام خواهد شد. تیم اصلی یک فهرست پستی خصوصی خواهد داشت که تنها در موارد محدود و زمانی که موضوعی نیاز به محرمانگی داشته باشد، از آن استفاده خواهد شد. زمانی که ارتباط‌ها و تصمیم‌گیری‌های خصوصی لازم باشند، تیم اصلی تمام تلاش خود را خواهد کرد تا پس از حذف اطلاعات شخصی، خصوصی یا حساس که نباید به‌صورت عمومی در اینترنت منتشر شوند، خلاصه‌ای از آن‌ها را با جامعه در میان بگذارد. + +### زیرکمیته‌ها + +تیم اصلی می‌تواند زیرکمیته‌هایی تشکیل دهد که برای بخش‌های خاصی از پروژه، نقش رهبری و هدایت ایفا کنند. همانند تیم اصلی به‌طور کلی، زیرکمیته‌ها نیز باید فعالیت‌های خود را به‌صورت باز و عمومی انجام دهند، مگر آنکه محرمانگی به‌طور مشخص لازم باشد. ارتباط‌های خصوصی زیرکمیته‌ها باید از طریق فهرست پستی خصوصی اصلی تیم اصلی انجام شوند، مگر آنکه به‌طور خاص روش دیگری لازم باشد. + +پرسش: اگر BDFL عضو یک زیرکمیته نباشد، آیا همچنان اختیار نهایی را دارد؟ + +پیشنهاد: این اختیار را دارند، اما بهتر است نماینده‌ای را تعیین کنند که بیشترِ مواقع این نقش را ایفا کند، و مداخله‌ی مستقیم BDFL تنها زمانی خواسته شود که کمیته با تصمیم آن نماینده مخالف باشد و هیچ راه‌حلی در درون تیم امکان‌پذیر نباشد. این با نماینده‌ی BDFL برای یک تصمیم خاص (یا در وضعیت کناره‌گیری) متفاوت است، جایی که BDFL عملاً تمام اختیار خود را به‌طور کامل به فرد دیگری واگذار می‌کند. It’s more like what Linus Torvalds uses with his “lieutenants” model. ### NumFOCUS Subcommittee From 05a691dabe8545989522b5766d8737113a87876f Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Wed, 28 May 2025 08:29:16 -0500 Subject: [PATCH 177/184] New translations governance.md (Persian) --- web/pandas/fa/about/governance.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/web/pandas/fa/about/governance.md b/web/pandas/fa/about/governance.md index ac004c480..f9e3fcfd3 100644 --- a/web/pandas/fa/about/governance.md +++ b/web/pandas/fa/about/governance.md @@ -88,10 +88,9 @@ BDFL می‌تواند جانشین خود را منصوب کند، اما ان پرسش: اگر BDFL عضو یک زیرکمیته نباشد، آیا همچنان اختیار نهایی را دارد؟ -پیشنهاد: این اختیار را دارند، اما بهتر است نماینده‌ای را تعیین کنند که بیشترِ مواقع این نقش را ایفا کند، و مداخله‌ی مستقیم BDFL تنها زمانی خواسته شود که کمیته با تصمیم آن نماینده مخالف باشد و هیچ راه‌حلی در درون تیم امکان‌پذیر نباشد. این با نماینده‌ی BDFL برای یک تصمیم خاص (یا در وضعیت کناره‌گیری) متفاوت است، جایی که BDFL عملاً تمام اختیار خود را به‌طور کامل به فرد دیگری واگذار می‌کند. It’s more like what Linus Torvalds uses with his -“lieutenants” model. +پیشنهاد: این اختیار را دارند، اما بهتر است نماینده‌ای را تعیین کنند که بیشترِ مواقع این نقش را ایفا کند، و مداخله‌ی مستقیم BDFL تنها زمانی خواسته شود که کمیته با تصمیم آن نماینده مخالف باشد و هیچ راه‌حلی در درون تیم امکان‌پذیر نباشد. این با نماینده‌ی BDFL برای یک تصمیم خاص (یا در وضعیت کناره‌گیری) متفاوت است، جایی که BDFL عملاً تمام اختیار خود را به‌طور کامل به فرد دیگری واگذار می‌کند. این بیشتر شبیه مدلی است که لینوس توروالدز با «دستیاران» خود به کار می‌برد. -### NumFOCUS Subcommittee +### زیرکمیته‌ی NumFOCUS The Core Team will maintain one narrowly focused subcommittee to manage its interactions with NumFOCUS. From 6d10afeb94898a4289470163b3ada1876e8e105d Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Wed, 28 May 2025 09:27:19 -0500 Subject: [PATCH 178/184] New translations governance.md (Persian) --- web/pandas/fa/about/governance.md | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/web/pandas/fa/about/governance.md b/web/pandas/fa/about/governance.md index f9e3fcfd3..e2e5e8c11 100644 --- a/web/pandas/fa/about/governance.md +++ b/web/pandas/fa/about/governance.md @@ -92,27 +92,15 @@ BDFL می‌تواند جانشین خود را منصوب کند، اما ان ### زیرکمیته‌ی NumFOCUS -The Core Team will maintain one narrowly focused subcommittee to manage its -interactions with NumFOCUS. - -- The NumFOCUS Subcommittee is comprised of at least 5 persons who manage - project funding that comes through NumFOCUS. It is expected that these funds - will be spent in a manner that is consistent with the non-profit mission of - NumFOCUS and the direction of the Project as determined by the full Core - Team. -- This Subcommittee shall NOT make decisions about the direction, scope or - technical direction of the Project. -- This Subcommittee will have at least 5 members. No more than 2 Subcommittee - Members can report to one person (either directly or indirectly) through - employment or contracting work (including the reportee, i.e. the reportee + 1 - is the max). This avoids effective majorities resting on one person. - -## Institutional Partners and Funding - -The BDFL and Core Team are the primary leadership for the project. No outside -institution, individual or legal entity has the ability to own, control, usurp -or influence the project other than by participating in the Project as -Contributors and Core Team. However, because institutions are the primary +تیم اصلی یک زیرکمیته‌ی متمرکز تشکیل خواهد داد تا تعاملات خود با NumFOCUS را مدیریت کند. + +- زیرکمیته‌ی NumFOCUS دست‌کم از ۵ نفر تشکیل شده است که مدیریت منابع مالی پروژه که از طریق NumFOCUS تأمین می‌شود را بر عهده دارند. انتظار می‌رود که این منابع مالی در راستای مأموریت غیرانتفاعی NumFOCUS و جهت‌گیری پروژه آن‌گونه که توسط کل تیم اصلی تعیین می‌شود هزینه شوند. +- این زیرکمیته نباید درباره‌ی جهت‌گیری، دامنه یا مسیر فنی پروژه تصمیم‌گیری کند. +- این زیرکمیته دست‌کم ۵ عضو خواهد داشت. بیش از ۲ عضو از زیرکمیته نباید (چه به‌صورت مستقیم و چه غیرمستقیم) از طریق اشتغال یا قرارداد کاری به یک نفر گزارش دهند (با احتساب فرد گزارش‌دهنده؛ یعنی حداکثر باید گزارش‌دهنده + ۱ نفر باشد). این کار از ایجاد اکثریت مؤثر تحت نفوذ یک فرد جلوگیری می‌کند. + +## شرکای نهادی و تأمین مالی + +BDFL و تیم اصلی رهبران اصلی پروژه هستند. هیچ نهاد بیرونی، فرد یا شخصیت حقوقی، جز از راه مشارکت در پروژه به‌عنوان مشارکت‌کننده یا عضو تیم اصلی، نمی‌تواند مالکیت، کنترل، سلطه یا تأثیری بر پروژه داشته باشد. However, because institutions are the primary funding mechanism for the project, it is important to formally acknowledge institutional participation in the project. These are Institutional Partners. From ab43e9622a4998ac962afe9c449045fd994bc212 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Sun, 1 Jun 2025 13:29:37 -0500 Subject: [PATCH 179/184] New translations governance.md (Persian) --- web/pandas/fa/about/governance.md | 58 ++++++++----------------------- 1 file changed, 15 insertions(+), 43 deletions(-) diff --git a/web/pandas/fa/about/governance.md b/web/pandas/fa/about/governance.md index e2e5e8c11..9b02e5da8 100644 --- a/web/pandas/fa/about/governance.md +++ b/web/pandas/fa/about/governance.md @@ -100,49 +100,21 @@ BDFL می‌تواند جانشین خود را منصوب کند، اما ان ## شرکای نهادی و تأمین مالی -BDFL و تیم اصلی رهبران اصلی پروژه هستند. هیچ نهاد بیرونی، فرد یا شخصیت حقوقی، جز از راه مشارکت در پروژه به‌عنوان مشارکت‌کننده یا عضو تیم اصلی، نمی‌تواند مالکیت، کنترل، سلطه یا تأثیری بر پروژه داشته باشد. However, because institutions are the primary -funding mechanism for the project, it is important to formally acknowledge -institutional participation in the project. These are Institutional Partners. - -An Institutional Contributor is any individual Project Contributor who -contributes to the project as part of their official duties at an Institutional -Partner. Likewise, an Institutional Core Team Member is any Core Team Member -who contributes to the project as part of their official duties at an -Institutional Partner. - -With these definitions, an Institutional Partner is any recognized legal entity -in the United States or elsewhere that employs at least one Institutional -Contributor or Institutional Core Team Member. Institutional Partners can be -for-profit or non-profit entities. - -Institutions become eligible to become an Institutional Partner by employing -individuals who actively contribute to The Project as part of their official -duties. To state this another way, the only way for an Institutional Partner to -influence the project is by actively contributing to the open development of -the project, on equal terms with any other member of the community of -Contributors and Core Team Members. Merely using pandas Software or Services in -an institutional context does not allow an entity to become an Institutional -Partner. Financial gifts do not enable an entity to become an Institutional -Partner. Once an institution becomes eligible for Institutional Partnership, -the Core Team must nominate and approve the Partnership. - -If an existing Institutional Partner no longer has a contributing employee, -they will be given a one-year grace period for other employees to begin -contributing. - -شریک نهادی آزاد است که برای فعالیت‌های خود در پروژه از هر طریق قانونی تأمین مالی کند. این می‌تواند شامل یک سازمان غیرانتفاعی باشد که از بنیادهای خصوصی و اهداکنندگان پول جمع‌آوری می‌کند، یا یک شرکت انتفاعی که محصولات و خدمات اختصاصی ایجاد می‌کند و از نرم‌افزارها و خدمات پروژه بهره می‌برد. Funding acquired by Institutional Partners to work on The Project is -called Institutional Funding. However, no funding obtained by an Institutional -Partner can override The Project BDFL and Core Team. If a Partner has funding -to do pandas work and the Core Team decides to not pursue that work as a -project, the Partner is free to pursue it on their own. However in this -situation, that part of the Partner’s work will not be under the pandas -umbrella and cannot use the Project trademarks in a way that suggests a formal -relationship. - -To acknowledge institutional contributions, there are two levels of -Institutional Partners, with associated benefits: - -**Tier 1** = an institution with at least one Institutional Core Team Member +BDFL و تیم اصلی رهبران اصلی پروژه هستند. هیچ نهاد بیرونی، فرد یا شخصیت حقوقی، جز از راه مشارکت در پروژه به‌عنوان مشارکت‌کننده یا عضو تیم اصلی، نمی‌تواند مالکیت، کنترل، سلطه یا تأثیری بر پروژه داشته باشد. با این حال، از آنجا که نهادها سازوکار اصلی تأمین مالی پروژه هستند، شایسته است که مشارکت نهادی در پروژه به‌صورت رسمی به رسمیت شناخته شود. اینها شرکای نهادی هستند. + +مشارکت‌کننده‌ی نهادی به هر مشارکت‌کننده‌ی پروژه گفته می‌شود که در چارچوب وظایف رسمی خود در یک شریک نهادی به پروژه کمک می‌کند. به همین ترتیب، عضو نهادی تیم اصلی به هر عضوی از تیم اصلی گفته می‌شود که در چارچوب وظایف رسمی خود در یک شریک نهادی به پروژه کمک می‌کند. + +با این تعاریف، شریک نهادی هر نهاد قانونی شناخته‌شده در ایالات متحده یا سایر کشورهاست که دست‌کم یک مشارکت‌کننده‌ی نهادی یا عضو نهادی تیم اصلی را در استخدام خود دارد. شرکای نهادی می‌توانند نهادهایی انتفاعی یا غیرانتفاعی باشند. + +نهادها زمانی واجد شرایط تبدیل شدن به شریک نهادی می‌شوند که افرادی را در استخدام خود داشته باشند که به‌طور فعال و در چارچوب وظایف رسمی‌شان در پروژه مشارکت می‌کنند. به‌بیان دیگر، تنها راهی که یک شریک نهادی می‌تواند بر پروژه اثر بگذارد، مشارکت فعال در توسعه‌ی باز پروژه است، آن‌هم با شرایطی برابر با هر عضو دیگر از جامعه‌ی مشارکت‌کنندگان و تیم اصلی. صرفاً استفاده از نرم‌افزارها یا خدمات پانداس در یک زمینه‌ی نهادی، موجب نمی‌شود که آن نهاد به‌عنوان شریک نهادی شناخته شود. هدایای مالی نیز باعث نخواهند شد که یک نهاد به شریک نهادی تبدیل شود. زمانی که یک نهاد واجد شرایط شراکت نهادی شود، تیم اصلی باید آن شراکت را نامزد کرده و مورد تأیید قرار دهد. + +اگر یک شریک نهادی موجود دیگر هیچ کارمند مشارکت‌کننده‌ای نداشته باشد، به آن نهاد یک مهلت یک‌ساله داده خواهد شد تا کارمندان دیگری مشارکت در پروژه را آغاز کنند. + +شریک نهادی آزاد است که برای فعالیت‌های خود در پروژه از هر طریق قانونی تأمین مالی کند. این می‌تواند شامل یک سازمان غیرانتفاعی باشد که از بنیادهای خصوصی و اهداکنندگان پول جمع‌آوری می‌کند، یا یک شرکت انتفاعی که محصولات و خدمات اختصاصی ایجاد می‌کند و از نرم‌افزارها و خدمات پروژه بهره می‌برد. منابع مالی‌ای که شرکای نهادی برای کار روی پروژه به دست می‌آورند، تأمین مالی نهادی نامیده می‌شود. با این حال، هیچ‌گونه تأمین مالی‌ای که توسط یک شریک نهادی به دست آمده باشد، نمی‌تواند اختیار BDFL و تیم اصلی پروژه را نادیده بگیرد. اگر یک شریک نهادی برای انجام کار بر روی پانداس بودجه‌ای داشته باشد اما تیم اصلی تصمیم بگیرد آن کار را به عنوان بخشی از پروژه دنبال نکند، آن شریک می‌تواند آزادانه آن کار را به صورت مستقل پیگیری کند. با این حال، در چنین وضعیتی، آن بخش از کار شریک مشمول پوشش رسمی پانداس نخواهد بود و نمی‌تواند از علائم تجاری پروژه به گونه‌ای استفاده کند که نشان‌دهنده‌ی رابطه‌ای رسمی باشد. + +برای به رسمیت شناختن مشارکت‌های نهادی، دو سطح از شرکای نهادی تعریف شده است که هر یک با مزایایی همراه هستند: + +**سطح ۱** = نهادی که دست‌کم یک عضو نهادی تیم اصلی داشته باشد؛ - Acknowledged on the pandas website, in talks and T-shirts. - Ability to acknowledge their own funding sources on the pandas website, in From 88869774f1e662e2883d6186a83b513c6e46c19f Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Sun, 1 Jun 2025 14:31:09 -0500 Subject: [PATCH 180/184] New translations governance.md (Persian) --- web/pandas/fa/about/governance.md | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/web/pandas/fa/about/governance.md b/web/pandas/fa/about/governance.md index 9b02e5da8..fdbccedbf 100644 --- a/web/pandas/fa/about/governance.md +++ b/web/pandas/fa/about/governance.md @@ -114,23 +114,19 @@ BDFL و تیم اصلی رهبران اصلی پروژه هستند. هیچ نه برای به رسمیت شناختن مشارکت‌های نهادی، دو سطح از شرکای نهادی تعریف شده است که هر یک با مزایایی همراه هستند: -**سطح ۱** = نهادی که دست‌کم یک عضو نهادی تیم اصلی داشته باشد؛ +**سطح ۱** = نهادی که دست‌کم یک عضو نهادی تیم اصلی داشته باشد -- Acknowledged on the pandas website, in talks and T-shirts. -- Ability to acknowledge their own funding sources on the pandas website, in - talks and T-shirts. -- Ability to influence the project through the participation of their Core Team - Member. +- در وب‌سایت پانداس، سخنرانی‌ها و تی‌شرت‌ها مورد قدردانی قرار می‌گیرد. +- امکان قدردانی از منابع مالی خود در وب‌سایت پانداس، سخنرانی‌ها و تی‌شرت‌ها. +- توانایی تأثیرگذاری بر پروژه از طریق مشارکت عضو تیم اصلی خود. -**Tier 2** = an institution with at least one Institutional Contributor +**سطح ۲** = نهادی که دست‌کم یک مشارکت‌کننده‌ی نهادی داشته باشد -## Breach +## تخلّف -Non-compliance with the terms of the governance documents shall be reported to -the Core Team either through public or private channels as deemed appropriate. +عدم پایبندی به مفاد اسناد حاکمیتی باید از طریق کانال‌های عمومی یا خصوصی، بسته به شرایط، به تیم اصلی گزارش داده شود. -## Changing the Governance +## تغییر در دستورکار -Changes to the governance are submitted via a GitHub pull request to The Project's -[governance page](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md). +تغییرات در دستورکار از طریق یک درخواست ادغام (Pull Request) در گیت‌هاب به [صفحه‌ی دستورکار پروژه](https://github.com/pandas-dev/pandas/blob/main/web/pandas/about/governance.md) ارسال می‌شوند. درخواست ادغام سپس در پاسخ به نظرات عمومی و بازبینی‌ها بهبود می‌یابد، با این هدف که به هم‌رأیی در جامعه برسد. پس از این دوره باز، یکی از اعضای تیم اصلی به تیم اصلی پیشنهاد می‌دهد که تغییرات تأیید شده و درخواست ادغام پذیرفته شود (قبول تغییرات پیشنهادی) یا پیشنهاد می‌دهد که درخواست ادغام بدون ادغام بسته شود (رد تغییرات پیشنهادی). عضو باید هش نهایی کامیت را در درخواست ادغام پیشنهادی برای پذیرش یا رد ذکر کرده و به‌طور خلاصه درخواست ادغام را توضیح دهد. حداقل ۸۰٪ از اعضای تیم اصلی باید رأی دهند و دست‌کم دو سوم از آراء باید مثبت باشد تا اقدام پیشنهادی انجام شود (کسری آراء به نزدیک‌ترین عدد صحیح گرد می‌شود). از آنجا که BDFL دارای اختیار نهایی در پروژه است، این حق را دارد که به‌تنهایی تغییرات را بپذیرد یا رد کند و یا تصمیمات تیم اصلی را نادیده بگیرد. From 7c2a55921e30c4b6df9415bf2dbeb2c20706ed79 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Sun, 1 Jun 2025 14:31:10 -0500 Subject: [PATCH 181/184] New translations roadmap.md (Persian) --- web/pandas/fa/about/roadmap.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/web/pandas/fa/about/roadmap.md b/web/pandas/fa/about/roadmap.md index 13dcfdf02..8abc3505f 100644 --- a/web/pandas/fa/about/roadmap.md +++ b/web/pandas/fa/about/roadmap.md @@ -1,9 +1,6 @@ -# Roadmap +# نقشه راه -This page provides an overview of the major themes in pandas' -development. Each of these items requires a relatively large amount of -effort to implement. These may be achieved more quickly with dedicated -funding or interest from contributors. +این صفحه نمایی کلی از محورهای اصلی در توسعه‌ی پانداس ارائه می‌دهد. اجرای هر یک از این موارد به تلاش نسبتاً زیادی نیاز دارد. دستیابی به این اهداف ممکن است با تأمین مالی اختصاصی یا علاقه‌ی مشارکت‌کنندگان سریع‌تر انجام شود. An item being on the roadmap does not mean that it will _necessarily_ happen, even with unlimited funding. During the implementation period we From 097a44b2ffe0fa32cbc419689d41684fbba886b6 Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Wed, 1 Oct 2025 20:26:54 -0500 Subject: [PATCH 182/184] New translations ecosystem.md (Spanish) --- web/pandas/es/community/ecosystem.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/pandas/es/community/ecosystem.md b/web/pandas/es/community/ecosystem.md index 801df5f5a..bc17bd5eb 100644 --- a/web/pandas/es/community/ecosystem.md +++ b/web/pandas/es/community/ecosystem.md @@ -278,7 +278,7 @@ import pandas as pd arctic = adb.Arctic("lmdb://arcticdb_test") ``` -> **Nota:** ArcticDB admite cualquier almacenamiento compatible con el API S3, incluido AWS. ArcticDB también permite el almacenamiento en Azure Blob.\ +> **Nota:** ArcticDB admite cualquier almacenamiento compatible con el API S3, incluido AWS. ArcticDB también permite el almacenamiento en Azure Blob. > ArcticDB también soporta LMDB para almacenamiento local/basado en archivos; para usar LMDB, utilice una ruta de LMDB como un URI: `adb.Arctic('lmdb://path/to/desired/database')`. #### Configuración de biblioteca From 4136cf0d734c8a2808b90ec0742a18325cda03fd Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 6 Oct 2025 19:21:00 -0500 Subject: [PATCH 183/184] New translations index.html (French) --- web/pandas/fr/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/pandas/fr/index.html b/web/pandas/fr/index.html index 2a080a95b..cb795503a 100644 --- a/web/pandas/fr/index.html +++ b/web/pandas/fr/index.html @@ -10,7 +10,7 @@

pandas

conçu à partir du langage de programmation Python.

- Installer pandas maintenant ! + Installez pandas maintenant !

From f2f06857ca0a3555280da922caf6f5a53ba6054c Mon Sep 17 00:00:00 2001 From: Scientific Python Translations Date: Mon, 6 Oct 2025 20:38:56 -0500 Subject: [PATCH 184/184] New translations citing.md (French) --- web/pandas/fr/about/citing.md | 59 ++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/web/pandas/fr/about/citing.md b/web/pandas/fr/about/citing.md index 7192446c6..0ef1f1977 100644 --- a/web/pandas/fr/about/citing.md +++ b/web/pandas/fr/about/citing.md @@ -5,37 +5,37 @@ If you use _pandas_ for a scientific publication, we would appreciate citations to the published software and the following paper: -- pandas on Zenodo, - Please find us on Zenodo and replace with the citation for the version you are using. You can replace the full author - list from there with "The pandas development team" like in the example below. - - ``` - @software{reback2020pandas, - author = {The pandas development team}, - title = {pandas-dev/pandas: Pandas}, - month = feb, - year = 2020, - publisher = {Zenodo}, - version = {latest}, - doi = {10.5281/zenodo.3509134}, - url = {https://doi.org/10.5281/zenodo.3509134} - } - ``` +- [pandas on Zenodo](https://zenodo.org/search?page=1&size=20&q=conceptrecid%3A%223509134%22&sort=-version&all_versions=True), + Please find us on Zenodo and replace with the citation for the version you are using. You can replace the full author + list from there with "The pandas development team" like in the example below. + + ``` + @software{reback2020pandas, + author = {The pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} + } + ``` - [Data structures for statistical computing in python](https://pub.curvenote.com/01908378-3686-7168-a380-d82bbf21c799/public/mckinney-57fc0d4e8a08cd7f26a4b8bf468a71f4.pdf), - McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. - - ``` - @InProceedings{ mckinney-proc-scipy-2010, - author = { {W}es {M}c{K}inney }, - title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, - booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, - pages = { 56 - 61 }, - year = { 2010 }, - editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, - doi = { 10.25080/Majora-92bf1922-00a } - } - ``` + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + ``` + @InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } + } + ``` ## Brand and logo @@ -86,6 +86,7 @@ The official logos of _pandas_ are: The pandas logo is available in full color and white accent. The full color logo should only appear against white backgrounds. +Le logo blanc doit être utilisé sur un fond de couleur contrastée. When using the logo, please follow the next directives: