Skip to content

Commit 6c08af5

Browse files
derrickburnsclaude
andcommitted
feat: Add Maven Central and PyPI publishing infrastructure
Publishing: - Add Maven Central publishing workflow (publish.yml) - Add PyPI publishing workflow (publish-pypi.yml) - Add PUBLISHING.md with complete setup instructions - Configure Sonatype settings in build.sbt - Add spark-packages.json for Spark Packages registry Documentation: - Add "Choose the Right Divergence" guide with decision tree - Add "Comparison with MLlib" explaining when to use each - Update version to 0.7.0 across all files This enables one-line installation: - sbt: libraryDependencies += "com.massivedatascience" %% "massivedatascience-clusterer" % "0.7.0" - pip: pip install massivedatascience-clusterer 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent c95d5c2 commit 6c08af5

File tree

13 files changed

+808
-14
lines changed

13 files changed

+808
-14
lines changed

.github/workflows/publish-pypi.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: Publish to PyPI
2+
3+
on:
4+
release:
5+
types: [published]
6+
workflow_dispatch:
7+
inputs:
8+
version:
9+
description: 'Version to publish (e.g., 0.7.0)'
10+
required: true
11+
type: string
12+
13+
jobs:
14+
build-and-publish:
15+
runs-on: ubuntu-latest
16+
name: Build and Publish Python Package
17+
18+
steps:
19+
- name: Checkout code
20+
uses: actions/checkout@v4
21+
22+
- name: Set up Python
23+
uses: actions/setup-python@v5
24+
with:
25+
python-version: '3.11'
26+
27+
- name: Install build dependencies
28+
run: |
29+
python -m pip install --upgrade pip
30+
pip install build twine
31+
32+
- name: Build package
33+
working-directory: python
34+
run: |
35+
python -m build
36+
37+
- name: Check package
38+
working-directory: python
39+
run: |
40+
twine check dist/*
41+
42+
- name: Publish to PyPI
43+
working-directory: python
44+
env:
45+
TWINE_USERNAME: __token__
46+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
47+
run: |
48+
twine upload dist/*

.github/workflows/publish.yml

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
name: Publish to Maven Central
2+
3+
on:
4+
release:
5+
types: [published]
6+
workflow_dispatch:
7+
inputs:
8+
version:
9+
description: 'Version to publish (e.g., 0.7.0)'
10+
required: true
11+
type: string
12+
13+
jobs:
14+
publish:
15+
runs-on: ubuntu-latest
16+
name: Publish to Maven Central
17+
18+
steps:
19+
- name: Checkout code
20+
uses: actions/checkout@v4
21+
22+
- name: Set up JDK 17
23+
uses: actions/setup-java@v4
24+
with:
25+
java-version: '17'
26+
distribution: 'temurin'
27+
28+
- name: Set up sbt
29+
uses: sbt/setup-sbt@v1
30+
31+
- name: Import GPG key
32+
uses: crazy-max/ghaction-import-gpg@v6
33+
with:
34+
gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }}
35+
passphrase: ${{ secrets.GPG_PASSPHRASE }}
36+
37+
- name: Publish Scala 2.13 artifacts
38+
env:
39+
SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }}
40+
SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }}
41+
PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
42+
run: |
43+
sbt ++2.13.14 \
44+
'set credentials += Credentials("Sonatype Nexus Repository Manager", "s01.oss.sonatype.org", sys.env("SONATYPE_USERNAME"), sys.env("SONATYPE_PASSWORD"))' \
45+
'set pgpPassphrase := sys.env.get("PGP_PASSPHRASE").map(_.toCharArray)' \
46+
clean compile publishSigned
47+
48+
- name: Publish Scala 2.12 artifacts
49+
env:
50+
SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }}
51+
SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }}
52+
PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
53+
run: |
54+
sbt ++2.12.18 \
55+
'set credentials += Credentials("Sonatype Nexus Repository Manager", "s01.oss.sonatype.org", sys.env("SONATYPE_USERNAME"), sys.env("SONATYPE_PASSWORD"))' \
56+
'set pgpPassphrase := sys.env.get("PGP_PASSPHRASE").map(_.toCharArray)' \
57+
clean compile publishSigned
58+
59+
- name: Release to Maven Central
60+
env:
61+
SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }}
62+
SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }}
63+
run: |
64+
sbt \
65+
'set credentials += Credentials("Sonatype Nexus Repository Manager", "s01.oss.sonatype.org", sys.env("SONATYPE_USERNAME"), sys.env("SONATYPE_PASSWORD"))' \
66+
sonatypeBundleRelease

PUBLISHING.md

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
# Publishing Guide
2+
3+
How to publish releases to Maven Central and PyPI.
4+
5+
## Prerequisites
6+
7+
### Maven Central (Sonatype)
8+
9+
1. **Create Sonatype Account**
10+
- Register at https://issues.sonatype.org
11+
- Create a new project ticket for `com.massivedatascience` namespace
12+
- Wait for approval (usually 1-2 business days)
13+
14+
2. **Generate GPG Key**
15+
```bash
16+
# Generate key
17+
gpg --full-generate-key
18+
# Choose: RSA and RSA, 4096 bits, key does not expire
19+
20+
# List keys to get key ID
21+
gpg --list-secret-keys --keyid-format LONG
22+
23+
# Export private key (for GitHub Actions)
24+
gpg --armor --export-secret-keys YOUR_KEY_ID > private-key.asc
25+
26+
# Upload public key to keyserver
27+
gpg --keyserver keyserver.ubuntu.com --send-keys YOUR_KEY_ID
28+
```
29+
30+
3. **Add GitHub Secrets**
31+
Go to: Repository → Settings → Secrets and variables → Actions
32+
33+
Add these secrets:
34+
| Secret Name | Value |
35+
|-------------|-------|
36+
| `SONATYPE_USERNAME` | Your Sonatype username |
37+
| `SONATYPE_PASSWORD` | Your Sonatype password |
38+
| `GPG_PRIVATE_KEY` | Contents of `private-key.asc` |
39+
| `GPG_PASSPHRASE` | Your GPG key passphrase |
40+
41+
### PyPI
42+
43+
1. **Create PyPI Account**
44+
- Register at https://pypi.org/account/register/
45+
46+
2. **Create API Token**
47+
- Go to https://pypi.org/manage/account/token/
48+
- Create token with scope "Entire account" or project-specific
49+
50+
3. **Add GitHub Secret**
51+
| Secret Name | Value |
52+
|-------------|-------|
53+
| `PYPI_API_TOKEN` | Your PyPI API token (starts with `pypi-`) |
54+
55+
---
56+
57+
## Publishing a Release
58+
59+
### Option 1: GitHub Release (Recommended)
60+
61+
1. **Update version numbers**
62+
```bash
63+
# Scala version
64+
echo 'version := "0.7.0"' > version.sbt
65+
66+
# Python version (in python/massivedatascience/__init__.py)
67+
# __version__ = "0.7.0"
68+
69+
# Python version (in python/pyproject.toml)
70+
# version = "0.7.0"
71+
```
72+
73+
2. **Commit and tag**
74+
```bash
75+
git add -A
76+
git commit -m "Release v0.7.0"
77+
git tag v0.7.0
78+
git push origin master --tags
79+
```
80+
81+
3. **Create GitHub Release**
82+
- Go to Releases → Draft a new release
83+
- Choose the tag `v0.7.0`
84+
- Add release notes
85+
- Click "Publish release"
86+
87+
4. **Automatic publishing**
88+
- The `publish.yml` workflow publishes to Maven Central
89+
- The `publish-pypi.yml` workflow publishes to PyPI
90+
91+
### Option 2: Manual Trigger
92+
93+
Go to Actions → Select workflow → Run workflow
94+
95+
---
96+
97+
## Verifying Publication
98+
99+
### Maven Central
100+
101+
After release (may take 10-30 minutes to sync):
102+
103+
```bash
104+
# Check Maven Central
105+
curl -s "https://search.maven.org/solrsearch/select?q=g:com.massivedatascience+AND+a:massivedatascience-clusterer*&rows=5&wt=json"
106+
```
107+
108+
Users can then add:
109+
```scala
110+
// build.sbt
111+
libraryDependencies += "com.massivedatascience" %% "massivedatascience-clusterer" % "0.7.0"
112+
```
113+
114+
```xml
115+
<!-- Maven -->
116+
<dependency>
117+
<groupId>com.massivedatascience</groupId>
118+
<artifactId>massivedatascience-clusterer_2.13</artifactId>
119+
<version>0.7.0</version>
120+
</dependency>
121+
```
122+
123+
### PyPI
124+
125+
```bash
126+
# Check PyPI
127+
pip index versions massivedatascience-clusterer
128+
```
129+
130+
Users can then install:
131+
```bash
132+
pip install massivedatascience-clusterer
133+
```
134+
135+
---
136+
137+
## Troubleshooting
138+
139+
### Maven Central Issues
140+
141+
**"PGP signature verification failed"**
142+
- Ensure GPG key is uploaded to keyserver
143+
- Wait a few minutes for keyserver propagation
144+
- Check `GPG_PRIVATE_KEY` secret is complete (including headers)
145+
146+
**"Missing required metadata"**
147+
- Check `build.sbt` has all required POM fields:
148+
- `homepage`
149+
- `scmInfo`
150+
- `developers`
151+
- `description`
152+
- `licenses`
153+
154+
**"Unauthorized"**
155+
- Verify `SONATYPE_USERNAME` and `SONATYPE_PASSWORD` secrets
156+
- Ensure account has permission for `com.massivedatascience` namespace
157+
158+
### PyPI Issues
159+
160+
**"Invalid API token"**
161+
- Regenerate token at https://pypi.org/manage/account/token/
162+
- Ensure token starts with `pypi-`
163+
164+
**"Version already exists"**
165+
- PyPI doesn't allow re-uploading same version
166+
- Bump version number and try again
167+
168+
---
169+
170+
## Local Testing
171+
172+
### Test Maven Publishing Locally
173+
174+
```bash
175+
# Publish to local Maven cache
176+
sbt publishLocal
177+
178+
# Check local artifacts
179+
ls ~/.ivy2/local/com.massivedatascience/
180+
```
181+
182+
### Test PyPI Packaging Locally
183+
184+
```bash
185+
cd python
186+
187+
# Build package
188+
python -m build
189+
190+
# Check package
191+
twine check dist/*
192+
193+
# Test install
194+
pip install dist/massivedatascience_clusterer-0.7.0-py3-none-any.whl
195+
```
196+
197+
---
198+
199+
## Release Checklist
200+
201+
- [ ] All tests pass (`sbt test`)
202+
- [ ] Version updated in `version.sbt`
203+
- [ ] Version updated in `python/massivedatascience/__init__.py`
204+
- [ ] Version updated in `python/pyproject.toml`
205+
- [ ] CHANGELOG.md updated
206+
- [ ] Git tag created
207+
- [ ] GitHub release created
208+
- [ ] Maven Central publication verified
209+
- [ ] PyPI publication verified
210+
- [ ] Documentation updated

build.sbt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,36 @@ val _ = if (javaMajor != 17 && javaMajor != 8) {
6060
// Adjust javac target based on detected version (fallback to 1.8 if running on 8)
6161
// Note: Java 9+ uses simple version numbers (e.g., "17"), not "17.0"
6262
javacOptions ++= (if (javaMajor <= 8) Seq("-source", "1.8", "-target", "1.8") else Seq("-source", "17", "-target", "17"))
63+
// ============================================
64+
// Maven Central Publishing Configuration
65+
// ============================================
6366
publishMavenStyle := true
6467
Test / publishArtifact := false
6568
pomIncludeRepository := { _ => false }
69+
70+
// Sonatype settings
71+
sonatypeProfileName := "com.massivedatascience"
72+
publishTo := sonatypePublishToBundle.value
73+
sonatypeCredentialHost := "s01.oss.sonatype.org"
74+
sonatypeRepository := "https://s01.oss.sonatype.org/service/local"
75+
76+
// POM metadata required by Maven Central
77+
homepage := Some(url("https://github.com/derrickburns/generalized-kmeans-clustering"))
78+
scmInfo := Some(
79+
ScmInfo(
80+
url("https://github.com/derrickburns/generalized-kmeans-clustering"),
81+
"scm:git@github.com:derrickburns/generalized-kmeans-clustering.git"
82+
)
83+
)
84+
developers := List(
85+
Developer(
86+
id = "derrickburns",
87+
name = "Derrick Burns",
88+
email = "derrick@massivedatascience.com",
89+
url = url("https://github.com/derrickburns")
90+
)
91+
)
92+
description := "Generalized K-Means clustering with Bregman divergences for Apache Spark"
6693
// Test dependencies
6794
libraryDependencies ++= Seq(
6895
"org.scalactic" %% "scalactic" % "3.2.19",

0 commit comments

Comments
 (0)