Wed Nov 24 15:56:18 2021 UTC ()
nltk_data: add shared files for nltk_data packages
This also includes a tool to create these packages.
(wiz)
diff -r0 -r1.1 pkgsrc/meta-pkgs/nltk_data/common.mk
diff -r0 -r1.1 pkgsrc/meta-pkgs/nltk_data/howto.md
diff -r0 -r1.1 pkgsrc/meta-pkgs/nltk_data/split.py
# $NetBSD: common.mk,v 1.1 2021/11/24 15:56:18 wiz Exp $
MASTER_SITES= https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/${TYPE}/
EXTRACT_SUFX?= .zip
MAINTAINER?= pkgsrc-users@NetBSD.org
HOMEPAGE?= https://www.nltk.org/data.html
COMMENT?= Natural Language Toolkit (NLTK) Data
INSTALLATION_DIRS+= share/nltk_data/${TYPE}
UNPACK?= no
do-build:
.if ${UNPACK} == "no"
do-install:
${INSTALL_DATA} ${_DISTDIR}/${DISTNAME}${EXTRACT_SUFX} ${DESTDIR}${PREFIX}/share/nltk_data/${TYPE}
.else
USE_TOOLS+= pax
do-install:
cd ${WRKDIR} && ${PAX} -pp -rw ${DISTNAME} ${DESTDIR}${PREFIX}/share/nltk_data/${TYPE}/
.endif
# Sources
Fetch https://www.nltk.org/nltk_data/ which is an XML file with an XSL
stylesheet
wget -O nltk_data.xml https://www.nltk.org/nltk_data/
should work.
This file contains one line per data, as of 2021-11-24 there are 108 entries,
and some meta package information.
# Generating the packages
Update the date in `split.py` and run it:
split.py
It will generate one package for each entry in the list in textproc/nltk_data-${id}
You'll then need to run 'make mdi' in each directory. If the package existed
before, make sure that the data really changed (distinfo checksums/size differ)
before committing.
#!/usr/bin/env python3
import os
import xml.etree.ElementTree as ET
tree = ET.parse('nltk_data.xml')
root = tree.getroot()
for child in root[0]:
id = child.attrib["id"]
path = f"/usr/pkgsrc/textproc/nltk_data-{id}"
try:
os.mkdir(path)
except Exception:
pass
name = child.attrib["name"]
if "webpage" in child.attrib:
webpage = "HOMEPAGE=\t" + child.attrib["webpage"]
else:
webpage = ""
if "license" in child.attrib:
license = child.attrib["license"]
subdir = child.attrib["subdir"]
url = child.attrib["url"]
with open(path + "/Makefile", "w") as f:
print(f"""# $NetBSD: split.py,v 1.1 2021/11/24 15:56:18 wiz Exp $
DISTNAME= {id}
PKGNAME= nltk_data-{id}-20211124
CATEGORIES= textproc
DIST_SUBDIR= ${{PKGNAME_NOREV}}
{webpage}
COMMENT= NLTK Data - {name}
#LICENSE= {license}
TYPE= {subdir}
.include "../../meta-pkgs/nltk_data/common.mk"
.include "../../mk/bsd.pkg.mk"
""", file=f, end='')
with open(path + "/DESCR", "w") as f:
print(f"""This package contains data for NLTK, the Natural Language Toolkit.
This package contains data from/for {name}.""", file=f)
with open(path + "/PLIST", "w") as f:
print(f"""@comment $NetBSD: split.py,v 1.1 2021/11/24 15:56:18 wiz Exp $
share/nltk/{subdir}/{id}.zip""", file=f)