Tue Aug 29 00:00:25 2023 UTC ()
biology/sra-tools: NCBI's toolkit for SRA

SRA tools is a toolkit for using data in the INSDC Sequence Read
Archives.  SRAs operated by the International Nucleotide Sequence
Database Collaboration house sequence reads and alignments generated
by "next-gen" sequencers.  SRA tools allows conversion of .sra
files, which INSDC SRAs maintain, from/to other formats that the
'next-gen' sequencers generate including:

* csfasta/csqual (ABI SOLiD)
* fastq (and fasta for writing)
* hdf5 (PacBio, reading only)
* qseq (older Illumina)
* sam (writing only) / bam (reading only)
* sff

The toolkit uses the NCBI-VDB back-end enabling seamless access to
remote SRA data and local SRA files.


(bacon)
diff -r0 -r1.1 pkgsrc/biology/sra-tools/DESCR
diff -r0 -r1.1 pkgsrc/biology/sra-tools/Makefile
diff -r0 -r1.1 pkgsrc/biology/sra-tools/PLIST
diff -r0 -r1.1 pkgsrc/biology/sra-tools/distinfo
diff -r0 -r1.1 pkgsrc/biology/sra-tools/patches/patch-build_env.cmake
diff -r0 -r1.1 pkgsrc/biology/sra-tools/patches/patch-ncbi-vdb_libs_kproc_bsd_sysmgr.c
diff -r0 -r1.1 pkgsrc/biology/sra-tools/patches/patch-ngs_ngs-java_CMakeLists.txt
diff -r0 -r1.1 pkgsrc/biology/sra-tools/patches/patch-tools_external_driver-tool_sratools.cpp

File Added: pkgsrc/biology/sra-tools/DESCR
SRA tools is a toolkit for using data in the INSDC Sequence Read
Archives.  SRAs operated by the International Nucleotide Sequence
Database Collaboration house sequence reads and alignments generated
by "next-gen" sequencers.  SRA tools allows conversion of .sra
files, which INSDC SRAs maintain, from/to other formats that the
'next-gen' sequencers generate including:

* csfasta/csqual (ABI SOLiD)
* fastq (and fasta for writing)
* hdf5 (PacBio, reading only)
* qseq (older Illumina)
* sam (writing only) / bam (reading only)
* sff

The toolkit uses the NCBI-VDB back-end enabling seamless access to
remote SRA data and local SRA files.

File Added: pkgsrc/biology/sra-tools/Makefile
# $NetBSD: Makefile,v 1.1 2023/08/29 00:00:24 bacon Exp $

DISTNAME=		sra-tools-3.0.6
CATEGORIES=		biology
MASTER_SITES=		${MASTER_SITE_GITHUB:=outpaddling/}
GITHUB_TAG=		984f1493e8612cd59a269569bf8c06e25e4e604c
GITHUB_SUBMODULES+=	outpaddling ncbi-vdb 38972fc ncbi-vdb

OWNER=		bacon@NetBSD.org
HOMEPAGE=	https://github.com/ncbi/sra-tools
COMMENT=	NCBI's toolkit for handling data in INSDC Sequence Read Archives
LICENSE=	public-domain AND gnu-lgpl-v2.1

# Upstream only supports specific platforms
ONLY_FOR_PLATFORM=	*-*-aarch64 *-*-x86_64

USE_LANGUAGES=	c c++
USE_TOOLS+=	bash bison cmake

# The config.c and file-path.posix.cpp substs follow
# static patches.  Run "make clean patch" before updating
# those patches so this subst does not get added to them.
SUBST_CLASSES+=		etcdir
SUBST_STAGE.etcdir=	pre-configure
SUBST_SED.etcdir+=	-e 's|"/etc/ncbi"|"${PREFIX}/etc/ncbi"|g'
SUBST_FILES.etcdir+=	ncbi-vdb/libs/kfg/config.c

SUBST_CLASSES+=		binpath
SUBST_STAGE.binpath=	pre-configure
SUBST_SED.binpath+=	-e 's|/usr/local/bin|${PREFIX}/bin|g'
SUBST_FILES.binpath+=	tools/external/driver-tool/file-path.posix.cpp

SUBST_CLASSES+=		submoddir
SUBST_STAGE.submoddir=	pre-configure
SUBST_SED.submoddir+=	-e 's|/../ncbi-vdb|/ncbi-vdb|g'
SUBST_FILES.submoddir+=	CMakeLists.txt

REPLACE_BASH=	*/*.sh */*/*.sh */*/*/*.sh */*/*/*/*.sh

USE_CMAKE=	yes
CMAKE_ARGS+=	-DVDB_LIBDIR:STRING=${WRKSRC}/ncbi-vdb/build/lib
CMAKE_ARGS+=	-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON
BUILDLINK_TRANSFORM.NetBSD+=	rm:-ldl	# ncbi-vdb/CMakeLists.posix.txt

EXAMPLESDIR=	${PREFIX}/share/examples/sra-tools

.include "../../mk/bsd.prefs.mk"

.if ${OPSYS} == Darwin
SUBST_CLASSES+=		srarpath
SUBST_STAGE.srarpath=	post-configure
SUBST_SED.srarpath+=	-e 's|@rpath|${PREFIX}/lib|g'
SUBST_FILES.srarpath+=	cmake-pkgsrc-build/ngs/ngs-sdk/CMakeFiles/*/link.txt
.endif

pre-configure:
.if ${OPSYS} == NetBSD
	# Incorrect gcc/x86_64/byteswap.h found before correct bsd/byteswap.h
	# Adding #ifndef __NetBSD__ to gcc/x86_64/byteswap.h doesn't work
	${RM} -f ${WRKSRC}/ncbi-vdb/interfaces/cc/gcc/x86_64/byteswap.h
.endif
	cd ${WRKSRC}/ncbi-vdb/build && cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON ..
.if ${OPSYS} == Darwin
	# Unsure how to prevent cmake from emitting @rpath in the first place
	# No instances of @rpath or -install_name in any files before cmake
	for f in ${WRKSRC}/ncbi-vdb/build/libs/ncbi-vdb/CMakeFiles/*/link.txt; do \
		${SED} -e 's|@rpath|${PREFIX}/lib|g' $${f} > $${f}.tmp && ${MV} -f $${f}.tmp $${f}; \
	done
.endif
	cd ${WRKSRC}/ncbi-vdb/build && ${MAKE}

post-install:
	${STRIP} ${DESTDIR}${PREFIX}/bin/*.${PKGVERSION_NOREV}
	${MV} ${DESTDIR}${PREFIX}/share/examples ${DESTDIR}${PREFIX}/share/examples-sratools
	${MKDIR} ${DESTDIR}${EXAMPLESDIR}
	${MV} ${DESTDIR}${PREFIX}/share/examples-sratools/* ${DESTDIR}${EXAMPLESDIR}
	${RMDIR} ${DESTDIR}${PREFIX}/share/examples-sratools
	${MV} ${DESTDIR}${PREFIX}/share/examples-python ${DESTDIR}${EXAMPLESDIR}/python
	# FIXME: Can we prevent build from detecting java?
	${RM} -rf ${DESTDIR}${PREFIX}/jar
	${RM} -rf ${DESTDIR}${PREFIX}/share/examples-java
	${RM} -rf ${DESTDIR}${PREFIX}/share/javadoc

.if ${OPSYS} != Linux && defined(OPSYS_HAS_KQUEUE)
.include "../../devel/libepoll-shim/buildlink3.mk"
.endif

.include "../../mk/pthread.buildlink3.mk"
.include "../../devel/cmake/build.mk"
.include "../../textproc/libxml2/buildlink3.mk"
.include "../../devel/hdf5/buildlink3.mk"
.include "../../archivers/zstd/buildlink3.mk"
.include "../../mk/bsd.pkg.mk"

File Added: pkgsrc/biology/sra-tools/PLIST
@comment $NetBSD: PLIST,v 1.1 2023/08/29 00:00:24 bacon Exp $
bin/abi-dump
bin/abi-dump.3
bin/abi-dump.${PKGVERSION}
bin/align-info
bin/align-info.3
bin/align-info.${PKGVERSION}
bin/cache-mgr
bin/cache-mgr.3
bin/cache-mgr.${PKGVERSION}
bin/check-corrupt
bin/check-corrupt.3
bin/check-corrupt.${PKGVERSION}
bin/fasterq-dump
bin/fasterq-dump-orig.${PKGVERSION}
bin/fasterq-dump.3
bin/fasterq-dump.${PKGVERSION}
bin/fastq-dump
bin/fastq-dump-orig.${PKGVERSION}
bin/fastq-dump.3
bin/fastq-dump.${PKGVERSION}
bin/illumina-dump
bin/illumina-dump.3
bin/illumina-dump.${PKGVERSION}
bin/kdbmeta
bin/kdbmeta.3
bin/kdbmeta.${PKGVERSION}
bin/ngs-pileup
bin/ngs-pileup.3
bin/ngs-pileup.${PKGVERSION}
bin/prefetch
bin/prefetch-orig.${PKGVERSION}
bin/prefetch.3
bin/prefetch.${PKGVERSION}
bin/rcexplain
bin/rcexplain.3
bin/rcexplain.${PKGVERSION}
bin/ref-variation
bin/ref-variation.3
bin/ref-variation.${PKGVERSION}
bin/sam-dump
bin/sam-dump-orig.${PKGVERSION}
bin/sam-dump.3
bin/sam-dump.${PKGVERSION}
bin/sff-dump
bin/sff-dump.3
bin/sff-dump.${PKGVERSION}
bin/sra-info
bin/sra-info.3
bin/sra-info.${PKGVERSION}
bin/sra-pileup
bin/sra-pileup-orig.${PKGVERSION}
bin/sra-pileup.3
bin/sra-pileup.${PKGVERSION}
bin/sra-search
bin/sra-search.3
bin/sra-search.${PKGVERSION}
bin/sra-stat
bin/sra-stat.3
bin/sra-stat.${PKGVERSION}
bin/srapath
bin/srapath-orig.${PKGVERSION}
bin/srapath.3
bin/srapath.${PKGVERSION}
bin/sratools
bin/sratools.3
bin/sratools.${PKGVERSION}
bin/test-sra
bin/test-sra.3
bin/test-sra.${PKGVERSION}
bin/var-expand
bin/var-expand.3
bin/var-expand.${PKGVERSION}
bin/vdb-config
bin/vdb-config.3
bin/vdb-config.${PKGVERSION}
bin/vdb-decrypt
bin/vdb-decrypt.3
bin/vdb-decrypt.${PKGVERSION}
bin/vdb-dump
bin/vdb-dump-orig.${PKGVERSION}
bin/vdb-dump.3
bin/vdb-dump.${PKGVERSION}
bin/vdb-encrypt
bin/vdb-encrypt.3
bin/vdb-encrypt.${PKGVERSION}
bin/vdb-validate
bin/vdb-validate.3
bin/vdb-validate.${PKGVERSION}
include/ncbi-vdb/NGS.hpp
include/ngs/Alignment.hpp
include/ngs/AlignmentIterator.hpp
include/ngs/ErrorMsg.hpp
include/ngs/Fragment.hpp
include/ngs/FragmentIterator.hpp
include/ngs/Package.hpp
include/ngs/Pileup.hpp
include/ngs/PileupEvent.hpp
include/ngs/PileupEventIterator.hpp
include/ngs/PileupIterator.hpp
include/ngs/Read.hpp
include/ngs/ReadCollection.hpp
include/ngs/ReadGroup.hpp
include/ngs/ReadGroupIterator.hpp
include/ngs/ReadIterator.hpp
include/ngs/Reference.hpp
include/ngs/ReferenceIterator.hpp
include/ngs/ReferenceSequence.hpp
include/ngs/Statistics.hpp
include/ngs/StringRef.hpp
include/ngs/adapter/AlignmentItf.hpp
include/ngs/adapter/ErrorMsg.hpp
include/ngs/adapter/FragmentItf.hpp
include/ngs/adapter/PileupEventItf.hpp
include/ngs/adapter/PileupItf.hpp
include/ngs/adapter/ReadCollectionItf.hpp
include/ngs/adapter/ReadGroupItf.hpp
include/ngs/adapter/ReadItf.hpp
include/ngs/adapter/Refcount.hpp
include/ngs/adapter/ReferenceItf.hpp
include/ngs/adapter/ReferenceSequenceItf.hpp
include/ngs/adapter/StatisticsItf.hpp
include/ngs/adapter/StringItf.hpp
include/ngs/adapter/defs.h
include/ngs/inl/Alignment.hpp
include/ngs/inl/AlignmentIterator.hpp
include/ngs/inl/Fragment.hpp
include/ngs/inl/FragmentIterator.hpp
include/ngs/inl/Package.hpp
include/ngs/inl/Pileup.hpp
include/ngs/inl/PileupEvent.hpp
include/ngs/inl/PileupEventIterator.hpp
include/ngs/inl/PileupIterator.hpp
include/ngs/inl/Read.hpp
include/ngs/inl/ReadCollection.hpp
include/ngs/inl/ReadGroup.hpp
include/ngs/inl/ReadGroupIterator.hpp
include/ngs/inl/ReadIterator.hpp
include/ngs/inl/Reference.hpp
include/ngs/inl/ReferenceIterator.hpp
include/ngs/inl/ReferenceSequence.hpp
include/ngs/inl/Statistics.hpp
include/ngs/inl/StringRef.hpp
include/ngs/itf/AlignmentItf.h
include/ngs/itf/AlignmentItf.hpp
include/ngs/itf/ErrBlock.h
include/ngs/itf/ErrBlock.hpp
include/ngs/itf/ErrorMsg.hpp
include/ngs/itf/FragmentItf.h
include/ngs/itf/FragmentItf.hpp
include/ngs/itf/PackageItf.hpp
include/ngs/itf/PileupEventItf.h
include/ngs/itf/PileupEventItf.hpp
include/ngs/itf/PileupItf.h
include/ngs/itf/PileupItf.hpp
include/ngs/itf/ReadCollectionItf.h
include/ngs/itf/ReadCollectionItf.hpp
include/ngs/itf/ReadGroupItf.h
include/ngs/itf/ReadGroupItf.hpp
include/ngs/itf/ReadItf.h
include/ngs/itf/ReadItf.hpp
include/ngs/itf/Refcount.h
include/ngs/itf/Refcount.hpp
include/ngs/itf/ReferenceItf.h
include/ngs/itf/ReferenceItf.hpp
include/ngs/itf/ReferenceSequenceItf.h
include/ngs/itf/ReferenceSequenceItf.hpp
include/ngs/itf/StatisticsItf.h
include/ngs/itf/StatisticsItf.hpp
include/ngs/itf/StringItf.h
include/ngs/itf/StringItf.hpp
include/ngs/itf/VTable.h
include/ngs/itf/VTable.hpp
include/ngs/itf/defs.h
include/ngs/unix/arm64/atomic32.h
include/ngs/unix/fat86/atomic32.h
include/ngs/unix/i386/atomic32.h
include/ngs/unix/x86_64/atomic32.h
include/ngs/win/atomic32.h
include/ngs/win/stdbool.h
lib/libncbi-ngs-c++-static.a
lib/libncbi-ngs-c++.a
lib/libncbi-ngs-c++.a.3
lib/libncbi-ngs-c++.a.${PKGVERSION}
lib/libncbi-ngs-static.a
lib/libncbi-ngs.a
lib/libncbi-ngs.a.3
lib/libncbi-ngs.a.${PKGVERSION}
lib/libncbi-ngs.so
lib/libncbi-ngs.so.3
lib/libncbi-ngs.so.${PKGVERSION}
lib/libngs-c++-static.a
lib/libngs-c++.a
lib/libngs-c++.a.3
lib/libngs-c++.a.${PKGVERSION}
lib/libngs-c++.so
lib/libngs-c++.so.3
lib/libngs-c++.so.${PKGVERSION}
share/examples/sra-tools/AlignSliceTest.cpp
share/examples/sra-tools/AlignTest.cpp
share/examples/sra-tools/DumpReferenceFASTA.cpp
share/examples/sra-tools/FragTest.cpp
share/examples/sra-tools/Makefile
share/examples/sra-tools/PileupTest.cpp
share/examples/sra-tools/README.txt
share/examples/sra-tools/RefTest.cpp
share/examples/sra-tools/expected.txt
share/examples/sra-tools/python/AlignSliceTest.py
share/examples/sra-tools/python/AlignTest.py
share/examples/sra-tools/python/FragTest.py
share/examples/sra-tools/python/Makefile
share/examples/sra-tools/python/PileupTest.py
share/examples/sra-tools/python/README.txt
share/examples/sra-tools/python/RefTest.py
share/examples/sra-tools/python/expected.txt

File Added: pkgsrc/biology/sra-tools/distinfo
$NetBSD: distinfo,v 1.1 2023/08/29 00:00:24 bacon Exp $

BLAKE2s (outpaddling-ncbi-vdb-38972fc.tar.gz) = 600ac8ff4f2fbea9d2f06edc0277087e1559636226756eae415826dfbdb1fb5a
SHA512 (outpaddling-ncbi-vdb-38972fc.tar.gz) = 0cda01bf2faf59aa16c4a402bcc132d4a2fc948240f6e13b2f1d3d93c4f900a837ebf8c24ee8089e88ed4de90a8ab441a4176f8193052fd7b538e27384bcb118
Size (outpaddling-ncbi-vdb-38972fc.tar.gz) = 18978410 bytes
BLAKE2s (sra-tools-3.0.6-984f1493e8612cd59a269569bf8c06e25e4e604c.tar.gz) = 2a13546011d4ea0a000a7e1e5fdfdf1d915b6adbac8fd818967ebf4c1670dba5
SHA512 (sra-tools-3.0.6-984f1493e8612cd59a269569bf8c06e25e4e604c.tar.gz) = 9b1e671c92ba1006392160258c5b6fba85ba7dba59a23e9ed8b534ca31b46ed9e6cf2c92ea69dc2897f5a4aa9da31f5c8981cebb442236d11bcef76b854dbec9
Size (sra-tools-3.0.6-984f1493e8612cd59a269569bf8c06e25e4e604c.tar.gz) = 44961381 bytes
SHA1 (patch-build_env.cmake) = 14add56e66e5578fd0d51ce55329285e0981ac4e
SHA1 (patch-ncbi-vdb_libs_kproc_bsd_sysmgr.c) = f49eb28f8bfeb528c1d7c2e9d184502b9eba273c
SHA1 (patch-ngs_ngs-java_CMakeLists.txt) = 44b822381fd564d045406cc926f807adae9fbe59
SHA1 (patch-tools_external_driver-tool_sratools.cpp) = c490d83f03471e6e8e34b8e88534469ed45b2886

File Added: pkgsrc/biology/sra-tools/patches/patch-build_env.cmake
$NetBSD: patch-build_env.cmake,v 1.1 2023/08/29 00:00:25 bacon Exp $

# static standard libs are not standard nor easy to install on Alma Linux

--- build/env.cmake.orig	2023-08-13 18:06:02.445968665 +0000
+++ build/env.cmake
@@ -555,7 +555,7 @@ endfunction()
 function(MakeLinksExe target install_via_driver)
 
     if ( "GNU" STREQUAL "${CMAKE_C_COMPILER_ID}" )
-        target_link_options( ${target} PRIVATE -static-libgcc -static-libstdc++ )
+        target_link_options( ${target} PRIVATE )
     endif()
 
 # creates dependency loops

File Added: pkgsrc/biology/sra-tools/patches/patch-ncbi-vdb_libs_kproc_bsd_sysmgr.c
$NetBSD: patch-ncbi-vdb_libs_kproc_bsd_sysmgr.c,v 1.1 2023/08/29 00:00:25 bacon Exp $

# Add NetBSD pthread_main_np() stand-in

--- ncbi-vdb/libs/kproc/bsd/sysmgr.c.orig	2023-08-15 12:41:59.000000000 +0000
+++ ncbi-vdb/libs/kproc/bsd/sysmgr.c
@@ -30,6 +30,23 @@
 #include <pthread.h>
 #include <unistd.h>
 
+/*
+ * NetBSD lacks non-POSIX POSIX threads functions (pthread_np)
+ */
+
+#ifdef __NetBSD__
+
+// This should be initialized to pthread_self() at the start of main()
+// If the thread ID of this thread is the same, then this is the main thread
+pthread_t _ncbi_thr_main;
+
+int	pthread_main_np(void)
+
+{
+    return pthread_equal(pthread_self(), _ncbi_thr_main);
+}
+#endif
+
 /* OnMainThread
  *  returns true if running on main thread
  */

File Added: pkgsrc/biology/sra-tools/patches/patch-ngs_ngs-java_CMakeLists.txt
$NetBSD: patch-ngs_ngs-java_CMakeLists.txt,v 1.1 2023/08/29 00:00:25 bacon Exp $

# Disable java

--- ngs/ngs-java/CMakeLists.txt.orig	2023-08-13 21:46:05.343038455 +0000
+++ ngs/ngs-java/CMakeLists.txt
@@ -99,7 +99,7 @@ if ( Java_FOUND )
         gov/nih/nlm/ncbi/ngs/error/cause/UnsupportedArchCause.java
     )
 
-    find_package(JNI)
+    # find_package(JNI)
 
     if ( JNI_FOUND )
         add_jar( ngs-java

File Added: pkgsrc/biology/sra-tools/patches/patch-tools_external_driver-tool_sratools.cpp
$NetBSD: patch-tools_external_driver-tool_sratools.cpp,v 1.1 2023/08/29 00:00:25 bacon Exp $

# Add NetBSD pthread_main_np() stand-in

--- tools/external/driver-tool/sratools.cpp.orig	2023-08-15 12:51:29.845642918 +0000
+++ tools/external/driver-tool/sratools.cpp
@@ -578,11 +578,20 @@ static int main(CommandLine const &argv)
 
 } // namespace sratools
 
+#ifdef __NetBSD__
+// Defined in sysmgr.c
+extern pthread_t       _ncbi_thr_main;
+#endif
+
 // BSD is defined when compiling on Mac
 // Use the MAC case below, not this one
 #if BSD && !MAC
 int main(int argc, char *argv[], char *envp[])
 {
+#ifdef __NetBSD__
+    _ncbi_thr_main = pthread_self();
+#endif
+
     auto const invocation = CommandLine(argc, argv, envp, nullptr);
     return sratools::main(invocation);
 }