diff --git a/.gitignore b/.gitignore
index 846b360f..abc72578 100644
--- a/.gitignore
+++ b/.gitignore
@@ -58,3 +58,6 @@ docs/_build
 
 # vim
 *.swp
+
+# vscode
+.vscode
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 93afd5cd..a0413cc3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,21 +1,33 @@
-cmake_minimum_required(VERSION 3.17)
-project (so3g)
+cmake_minimum_required(VERSION 3.20)
+project(${SKBUILD_PROJECT_NAME} LANGUAGES CXX C)
 
 include(local.cmake OPTIONAL)
 
-# cmake policies -- best to keep these in sync with spt3g!
-# Don't warn about removal of FindBoost in cmake 3.30+
-if(POLICY CMP0167)
-	cmake_policy(SET CMP0167 NEW)
+# Warn if the user invokes CMake directly
+if (NOT SKBUILD)
+    message(WARNING "\
+    This CMake file is meant to be executed using 'scikit-build-core'.
+    Running it directly will almost certainly not produce the desired
+    result. If you are a user trying to install this package, use the
+    command below, which will install all necessary build dependencies,
+    compile the package in an isolated environment, and then install it.
+    =====================================================================
+    $ pip install .
+    =====================================================================
+    If you are a software developer, and this is your own package, then
+    it is usually much more efficient to install the build dependencies
+    in your environment once and use the following command that avoids
+    a costly creation of a new virtual environment at every compilation:
+    =====================================================================
+    $ pip install nanobind scikit-build-core[pyproject]
+    $ pip install --no-build-isolation -ve .
+    =====================================================================
+    You may optionally add -Ceditable.rebuild=true to auto-rebuild when
+    the package is imported. Otherwise, you need to rerun the above
+    after editing C++ files.")
 endif()
 
-# Default to Release because we want that -O3.  This is what spt3g_software does too.
-if(NOT CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE "Release" CACHE STRING
-    "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel" FORCE)
-endif(NOT CMAKE_BUILD_TYPE)
-
-# Require C++ 17 (aligned with spt3g)
+# Require C++17
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
@@ -24,146 +36,102 @@ set(CMAKE_CXX_EXTENSIONS OFF)
 # modules.  All code should be built with PIC.
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
-# For this to be found, make sure the spt3g build directory can be
-# searched; i.e. -DCMAKE_PREFIX_PATH=/path/to/spt3g_software/build
-find_package(Spt3g REQUIRED)
+find_package(
+    Python 3.9
+    REQUIRED COMPONENTS Interpreter Development.Module
+    OPTIONAL_COMPONENTS Development.SABIModule
+)
 
-find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
+find_package(OpenMP)
 find_package(FLAC)
 find_package(GSL)
 find_package(Ceres)
+find_package(nanobind CONFIG REQUIRED)
+
+set(libso3g_SOURCES
+    src/main.cxx
+    src/hkagg.cxx
+    src/Intervals.cxx
+    src/Butterworth.cxx
+    src/Ranges.cxx
+    src/Projection.cxx
+    src/so_linterp.cxx
+    src/exceptions.cxx
+    src/array_ops.cxx
+    src/fitting_ops.cxx
+)
 
-find_package(OpenMP)
-if(OPENMP_FOUND)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-else()
-  message(WARNING "OpenMP not being linked -- this may affect performance.")
-endif()
+# Documentation for the options here:
+# https://nanobind.readthedocs.io/en/latest/api_cmake.html
+nanobind_add_module(libso3g NOMINSIZE NB_STATIC ${libso3g_SOURCES})
 
-# Determine the location of site-packages.
-execute_process(COMMAND ${Python_EXECUTABLE} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())" OUTPUT_VARIABLE PYTHON_SITE_PACKAGES OUTPUT_STRIP_TRAILING_WHITESPACE)
+# Install directive for scikit-build-core
+install(TARGETS libso3g LIBRARY DESTINATION so3g)
 
 # Numpy include directory
 execute_process(COMMAND ${Python_EXECUTABLE} -c
-  "import numpy; print(numpy.get_include())"
-  OUTPUT_VARIABLE NUMPY_INCLUDE_DIR
-  OUTPUT_STRIP_TRAILING_WHITESPACE
+    "import numpy; print(numpy.get_include())"
+    OUTPUT_VARIABLE NUMPY_INCLUDE_DIR
+    OUTPUT_STRIP_TRAILING_WHITESPACE
 )
 
 include_directories(${CMAKE_SOURCE_DIR}/include ${CMAKE_BINARY_DIR} )
 include_directories(${NUMPY_INCLUDE_DIR})
 
-#
-# Define the so3g build target.  This is a shared library.
-#
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY so3g)
-add_library(so3g SHARED
-  src/main.cxx
-  src/hkagg.cxx
-  src/Intervals.cxx
-  src/Butterworth.cxx
-  src/Ranges.cxx
-  src/Projection.cxx
-  src/G3SuperTimestream.cxx
-  src/so_linterp.cxx
-  src/exceptions.cxx
-  src/array_ops.cxx
-  src/fitting_ops.cxx
-)
-
-# We could disable the lib prefix on the output library... but let's not.
-#set_target_properties(so3g PROPERTIES PREFIX "")
+# Add external dependencies to our include / link commands
 
-# Disable boost python auto_ptr warnings
-target_compile_definitions(so3g PUBLIC BOOST_NO_AUTO_PTR)
+if(OPENMP_FOUND)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+else()
+    message(WARNING "OpenMP not being linked -- this may affect performance.")
+endif()
 
-# Link to the core spt3g library.  This brings in boost dependencies
-# as well.
-target_link_libraries(so3g PUBLIC spt3g::core)
+target_link_libraries(libso3g PUBLIC Ceres::ceres Eigen3::Eigen)
 
-# Link to GSL
-target_include_directories(so3g PRIVATE ${GSL_INCLUDE_DIR})
-target_link_libraries(so3g PUBLIC ${GSL_LIBRARIES})
-# Link Ceres
-target_link_libraries(so3g PUBLIC Ceres::ceres Eigen3::Eigen)
+target_include_directories(libso3g PRIVATE ${FLAC_INCLUDE_DIR})
 
-# FLAC- library already comes from spt3g dependencies, but
-# we need to have the headers.
-target_include_directories(so3g PRIVATE ${FLAC_INCLUDE_DIR})
+target_include_directories(libso3g PRIVATE ${GSL_INCLUDE_DIR})
+target_link_libraries(libso3g PUBLIC ${GSL_LIBRARIES})
 
 # You probably want to select openblas, so pass -DBLA_VENDOR=OpenBLAS
 find_package(BLAS REQUIRED)
 if(BLAS_FOUND)
-  message("-- BLAS found: ${BLAS_LIBRARIES}")
-  target_link_libraries(so3g PUBLIC ${BLAS_LIBRARIES})
-
-  # The BLAS library may or may not include the cblas_* bindings.
-  # This variable set is needed by check_function_exists; starting in
-  # cmake v3.18 you can say BLAS::BLAS instead of the lib path...
-  set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
-  check_function_exists(cblas_sgemm CBLAS_OK)
-  if(${CBLAS_OK})
-    message("-- cblas bindings are included in the BLAS library")
-  else()
-    message("-- cblas bindings not found in BLAS; adding cblas.")
-    target_link_libraries(so3g PUBLIC cblas)
-  endif()
-
-  # On MacOS with clang linking to the Accelerate framework, the cblas
-  # headers are not always found.  Handle this case.  Also note that the
-  # Accelerate framework has documented numerical problems- consider using
-  # a better BLAS/LAPACK implementation.
-  if(BLAS_Accelerate_LIBRARY)
-    target_include_directories(so3g PRIVATE ${BLAS_Accelerate_LIBRARY}/Versions/A/Frameworks/vecLib.framework/Headers)
-  endif()
+    message("-- BLAS found: ${BLAS_LIBRARIES}")
+    target_link_libraries(libso3g PUBLIC ${BLAS_LIBRARIES})
+
+    # The BLAS library may or may not include the cblas_* bindings.
+    # This variable set is needed by check_function_exists; starting in
+    # cmake v3.18 you can say BLAS::BLAS instead of the lib path...
+    set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
+    check_function_exists(cblas_sgemm CBLAS_OK)
+    if(${CBLAS_OK})
+        message("-- cblas bindings are included in the BLAS library")
+    else()
+        message("-- cblas bindings not found in BLAS; adding cblas.")
+        target_link_libraries(libso3g PUBLIC cblas)
+    endif()
+
+    # On MacOS with clang linking to the Accelerate framework, the cblas
+    # headers are not always found.  Handle this case.  Also note that the
+    # Accelerate framework has documented numerical problems- consider using
+    # a better BLAS/LAPACK implementation.
+    if(BLAS_Accelerate_LIBRARY)
+        target_include_directories(libso3g PRIVATE
+            ${BLAS_Accelerate_LIBRARY}/Versions/A/Frameworks/vecLib.framework/Headers
+        )
+    endif()
 endif(BLAS_FOUND)
 
 # This custom target generates _version.h, in the build tree.  That is all.
 add_custom_target(so3g-version
-  COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/version_h.py
-  SO3G_VERSION_STRING ${CMAKE_CURRENT_BINARY_DIR}/_version.h
-  SOURCES version_h.py
+    COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/version_h.py
+    SO3G_VERSION_STRING ${CMAKE_CURRENT_BINARY_DIR}/_version.h
+    SOURCES version_h.py
 )
-
-add_dependencies(so3g so3g-version)
-
-# Make a list of .py files for the library.
-file(GLOB MY_PYTHONS
-  "${CMAKE_CURRENT_SOURCE_DIR}/python/*.py")
-file(GLOB MY_PYTHONS_HK
-  "${CMAKE_CURRENT_SOURCE_DIR}/python/hk/*.py")
-file(GLOB MY_PYTHONS_PROJ
-  "${CMAKE_CURRENT_SOURCE_DIR}/python/proj/*.py")
-file(GLOB MY_PYTHONS_SMURF
-  "${CMAKE_CURRENT_SOURCE_DIR}/python/smurf/*.py")
-
-# Define the install rules.
-
-if(DEFINED PYTHON_INSTALL_DEST)
-  get_filename_component(INSTALL_DEST ${PYTHON_INSTALL_DEST}/so3g
-                         ABSOLUTE BASE_DIR ${CMAKE_BINARY_DIR})
-  message("local.cmake has specified the install dir: ${INSTALL_DEST}")
-else()
-  set(INSTALL_DEST ${PYTHON_SITE_PACKAGES}/so3g)
-endif()
-
-install(TARGETS so3g
-        DESTINATION ${INSTALL_DEST})
-
-install(FILES ${MY_PYTHONS}
-        DESTINATION ${INSTALL_DEST})
-install(FILES ${MY_PYTHONS_HK}
-        DESTINATION ${INSTALL_DEST}/hk)
-install(FILES ${MY_PYTHONS_PROJ}
-        DESTINATION ${INSTALL_DEST}/proj)
-install(FILES ${MY_PYTHONS_SMURF}
-        DESTINATION ${INSTALL_DEST}/smurf)
-
-# To add a prefix, pass CMAKE_INSTALL_PREFIX.
-install(PROGRAMS scripts/so-hk-tool DESTINATION bin)
+add_dependencies(libso3g so3g-version)
 
 add_custom_target(prep-readthedocs
-  COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/docs/extract_docstrings.py
-          --prep-rtd --source-branch=master
-  )
+    COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/docs/extract_docstrings.py
+            --prep-rtd --source-branch=master
+)
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index bf94ef60..00000000
--- a/Dockerfile
+++ /dev/null
@@ -1,50 +0,0 @@
-# so3g
-# A containerized so3g installation.
-
-# Build on spt3g base image
-FROM simonsobs/spt3g:0.3-289-g4bd3275
-
-# Set locale
-ENV LANG C.UTF-8
-
-# Build tools needed for pixell; blas needed for so3g.
-RUN apt update && apt install -y \
-    build-essential \
-    automake \
-    gfortran \
-    libopenblas-openmp-dev \
-    libbz2-dev \
-    python-is-python3 \
-    libgoogle-glog-dev \
-    libgflags-dev \
-    libmetis-dev \
-    libgtest-dev \
-    libabsl-dev \
-    libeigen3-dev
-
-# Set the working directory
-WORKDIR /app_lib/so3g
-
-# Fetch and install ceres-solver
-RUN git clone --depth 1 --branch 2.2.0 --recurse-submodules https://github.com/ceres-solver/ceres-solver
-
-WORKDIR /app_lib/so3g/ceres-solver
-
-RUN mkdir build \
-    && cd build \
-    && cmake .. -DBUILD_TESTING=OFF \
-    && make -j$(nproc) \
-    && make install
-
-# Set the working directory back to so3g
-WORKDIR /app_lib/so3g
-
-# Copy the current directory contents into the container
-ADD . /app_lib/so3g
-
-# Install any needed packages specified in requirements.txt
-RUN pip3 install -r requirements.txt
-RUN pip3 install -r test-requirements.txt
-
-# Build so3g
-RUN /bin/bash /app_lib/so3g/docker/so3g-setup.sh
diff --git a/docker-compose.yml b/docker-compose.yml
deleted file mode 100644
index 59cfb631..00000000
--- a/docker-compose.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-version: '3.2'
-services:
-
-  so3g:
-    image: "so3g"
-    build: .
diff --git a/docker/qpoint-setup.sh b/docker/qpoint-setup.sh
deleted file mode 100644
index a23dfd8e..00000000
--- a/docker/qpoint-setup.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env bash
-
-QP_VER=1.11.2
-git clone https://github.com/arahlin/qpoint.git  --branch $QP_VER
-cd qpoint
-python3 setup.py install
diff --git a/docker/so3g-setup.sh b/docker/so3g-setup.sh
deleted file mode 100644
index f8594493..00000000
--- a/docker/so3g-setup.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env bash
-
-mkdir -p build
-cd build
-cmake \
-    -DCMAKE_VERBOSE_MAKEFILE=ON \
-    -DCMAKE_BUILD_TYPE=Release \
-    -DPython_EXECUTABLE=$(which python3) \
-    ..
-make -j 2
-make install
diff --git a/include/Butterworth.h b/include/Butterworth.h
index 9a99dc91..7ba7c980 100644
--- a/include/Butterworth.h
+++ b/include/Butterworth.h
@@ -1,10 +1,14 @@
 #include <vector>
 #include <cstdint>
+#include <array>
 
-#include <boost/python.hpp>
+#include <nanobind/nanobind.h>
 
 #include "numpy_assist.h"
 
+namespace nb = nanobind;
+
+
 class BFilterParams {
 public:
     BFilterParams(int32_t b0, int32_t b1, int b_bits, int p_bits, int shift)
@@ -25,11 +29,12 @@ class BFilterBank {
     void apply(int32_t* input, int32_t* output, int n_samp);
     void apply_to_float(float *input, float *output, float unit, int n_samp);
 
-    void apply_buffer(boost::python::object input,
-                      boost::python::object output);
+    void apply_buffer(nb::object input,
+                      nb::object output);
 
     std::vector<std::vector<std::array<std::int64_t,2>>> w;  // (n_bank,n_chan,2)
     std::vector<BFilterParams> par;
 };
 
-void butterworth_test();
+
+void register_butterworth(nb::module_ & m);
diff --git a/include/G3SuperTimestream.h b/include/G3SuperTimestream.h
deleted file mode 100644
index 654c3083..00000000
--- a/include/G3SuperTimestream.h
+++ /dev/null
@@ -1,122 +0,0 @@
-#pragma once
-
-#include "so3g_numpy.h"
-#include "numpy_assist.h"
-#include <G3Frame.h>
-#include <G3Map.h>
-
-#include <exception>
-#include <stdint.h>
-
-using namespace std;
-
-class G3SuperTimestream : public G3FrameObject {
-	// Storage for a 2d array with shape (n_dets, n_samps) along
-	// with the vector of associated timestamps (n_samps), and the
-	// vector of channel names (n_dets).  Serializes with lossless
-	// compression of int32 and int64, and controllable
-	// quantization and compression of float32 and lfoat64.
-public:
-	G3SuperTimestream();
-	~G3SuperTimestream();
-
-	G3SuperTimestream(const G3VectorString &names_, const G3VectorTime &times_);
-	G3SuperTimestream(const G3VectorString &names_, const G3VectorTime &times_,
-			  const bp::object &data);
-	G3SuperTimestream(const G3VectorString &names_, const G3VectorTime &times_,
-			  const bp::object &data_, const std::vector<double> &quanta_);
-
-	// This object contains pointers to memory that it
-	// allocated... and they're freed on destruction.  The
-	// responsible thing to do in such circumstances is to delete
-	// or modify the move and copy constructors.  But if we do
-	// that, boost python complains about something.  In G3 you
-	// avoid move/copy by wrapping all instances in a G3xxxPtr,
-	// and essentially pass the object around by reference, so
-	// that's what we'll do.  But beware, there's nothing
-	// preventing you from returning G3SuperTimestream directly
-	// from a function (except the inevitable segfault).
-
-	string Description() const;
-	string Summary() const;
-
-	bool Extract(bp::object dest, bp::object dest_indices, bp::object src_indices,
-		     int start, int stop);
-	bool Encode();
-	bool Decode();
-	void Calibrate(vector<double> rescale);
-	int Options(int enable=-1,
-		    int flac_level=-1, int bz2_workFactor=-1,
-                    int data_algo=-1, int times_algo=-1);
-
-	// Interface for C++...
-	bool SetDataFromBuffer(void* buf, int ndim, int shape[], int typenum,
-			       std::pair<int,int> sample_range);
-
-
-	template <class A> void load(A &ar, unsigned v);
-	template <class A> void save(A &ar, unsigned v) const;
-
-	struct array_desc {
-		npy_intp type_num;
-		npy_intp ndim;
-		npy_intp shape[32];
-		npy_intp nbytes;
-	};
-
-	// Container for the compressed data.
-	struct array_blob {
-		int size;
-		char *buf;
-		int count;
-		vector<int> offsets;
-	};
-
-	enum algos {
-		ALGO_NONE = 0,
-		ALGO_DO_FLAC = (1 << 0),
-		ALGO_DO_BZ = (1 << 1),
-		ALGO_DO_CONST = (1 << 2)
-	};
-
-	struct options_type {
-		int8_t times_algo;
-		int8_t data_algo;
-		int8_t flac_level;
-		int8_t bz2_workFactor;
-	} options;
-
-	G3VectorTime times;
-	G3VectorString names;
-
-	bool float_mode;
-	bool dataful;
-	vector<double> quanta;
-	struct array_desc desc;
-
-	PyArrayObject *array;
-	struct array_blob *ablob;
-};
-
-// This specialization tells cereal to use G3SuperTimestream::load/save
-// and not the base class' load/save.
-namespace cereal {
-	template <class A> struct specialize<
-		A, G3SuperTimestream, cereal::specialization::member_load_save> {};
-}
-
-G3_POINTERS(G3SuperTimestream);
-G3_SERIALIZABLE(G3SuperTimestream, 0);
-
-class g3supertimestream_exception : std::exception
-{
-	// Exception raised when internal validity checks fail.  This will
-	// also be mapped to some particular Python exception type.
-public:
-	std::string text;
-	g3supertimestream_exception(std::string text) :	text{text} {};
-
-	std::string msg_for_python() const throw() {
-		return text;
-	}
-};
diff --git a/include/Intervals.h b/include/Intervals.h
index 298661bf..f7acc490 100644
--- a/include/Intervals.h
+++ b/include/Intervals.h
@@ -1,13 +1,15 @@
 #pragma once
 
-#include <G3TimeStamp.h>
-
 #include <stdint.h>
 
+#include <nanobind/nanobind.h>
+
 #include "numpy_assist.h"
 
 using namespace std;
-namespace bp = boost::python;
+
+namespace nb = nanobind;
+
 
 // Template class for working with intervals -- pairs of objects of
 // the same (well-ordered) type, with operations defined that support
@@ -18,14 +20,14 @@ class Intervals {
 public:
     pair<T,T> domain;
     vector<pair<T,T>> segments;
-    
+
     // Construction
     Intervals();
     Intervals(pair<T,T> domain) : domain{domain} {}
     Intervals(T start, T end) : Intervals(make_pair(start,end)) {}
+    Intervals(Intervals const & other);
 
-    //static Intervals<T> from_array(const bp::numpy::ndarray &src);
-    static Intervals<T> from_array(const bp::object &src);
+    static Intervals<T> * from_array(const nb::object &src);
 
     // Basic ops
     Intervals<T>& merge(const Intervals<T> &src);
@@ -39,9 +41,9 @@ class Intervals {
 
     void cleanup();
 
-    bp::object array() const;
- 
-    Intervals<T> getitem(bp::object indices);
+    nb::object array() const;
+
+    Intervals<T> getitem(nb::object indices);
 
     // Operators.
     Intervals<T> operator~() const;
@@ -53,8 +55,8 @@ class Intervals {
     Intervals<T> operator*(const Intervals<T> &src) const;
 
     // Special conversions.
-    static bp::object from_mask(const bp::object &src, int n_bits);
-    static bp::object mask(const bp::list &ivlist, int n_bits);
+    static nb::object from_mask(const nb::object &src, int n_bits);
+    static nb::object mask(const nb::list &ivlist, int n_bits);
 
     string Description() const;
 };
@@ -63,4 +65,6 @@ class Intervals {
 typedef Intervals<double> IntervalsDouble;
 typedef Intervals<int64_t> IntervalsInt;
 typedef Intervals<int32_t> IntervalsInt32;
-typedef Intervals<G3Time> IntervalsTime;
+
+
+void register_intervals(nb::module_ & m);
diff --git a/include/Projection.h b/include/Projection.h
index d5c8c215..4a9e2055 100644
--- a/include/Projection.h
+++ b/include/Projection.h
@@ -1,8 +1,16 @@
-#include <boost/python.hpp>
+#pragma once
+
+#include <vector>
+
+#include <nanobind/nanobind.h>
+
 #include "exceptions.h"
 #include "numpy_assist.h"
 
-namespace bp = boost::python;
+using namespace std;
+
+namespace nb = nanobind;
+
 
 // For detector timestreams, a.k.a. "signal", float32 is sufficient in
 // most cases.  We don't want to template this, but let's leave our
@@ -16,9 +24,9 @@ typedef float FSIGNAL;
 template <typename DTYPE>
 class SignalSpace {
 public:
-    SignalSpace(bp::object input, std::string var_name,
+    SignalSpace(nb::object input, std::string var_name,
                 int dtype, int n_det, int n_time);
-    SignalSpace(bp::object input, std::string var_name,
+    SignalSpace(nb::object input, std::string var_name,
                 int dtype, int n_det, int n_time, int n_thirdaxis);
     ~SignalSpace() { if (data_ptr) free(data_ptr); };
 
@@ -29,10 +37,10 @@ class SignalSpace {
     vector<int> dims;
 
     vector<BufferWrapper<DTYPE>> bw;
-    bp::object ret_val;
+    nb::object ret_val;
 
 private:
-    bool _Validate(bp::object input, std::string var_name,
+    bool _Validate(nb::object input, std::string var_name,
                    int dtype);
 };
 
@@ -42,23 +50,23 @@ template<typename CoordSys, typename PixelSys, typename SpinSys>
 class ProjectionEngine {
 public:
     //ProjectionEngine(PixelSys pixelizor);
-    ProjectionEngine(bp::object pix_args);
-    bp::object coords(bp::object pbore, bp::object pofs,
-                      bp::object coord);
-    bp::object pixels(bp::object pbore, bp::object pofs, bp::object pixel);
-    vector<int> tile_hits(bp::object pbore, bp::object pofs);
-    bp::object tile_ranges(bp::object pbore, bp::object pofs, bp::object tile_lists);
-    bp::object pointing_matrix(bp::object pbore, bp::object pofs, bp::object response,
-                               bp::object pixel, bp::object proj);
-    bp::object zeros(bp::object shape);
-    bp::object pixel_ranges(bp::object pbore, bp::object pofs, bp::object map, int n_domain=-1);
-    bp::object from_map(bp::object map, bp::object pbore, bp::object pofs,
-                        bp::object response, bp::object signal);
-    bp::object to_map(bp::object map, bp::object pbore, bp::object pofs, bp::object response,
-                      bp::object signal, bp::object det_weights,
-                      bp::object thread_intervals);
-    bp::object to_weight_map(bp::object map, bp::object pbore, bp::object pofs,
-                  bp::object response, bp::object det_weights, bp::object thread_intervals);
+    ProjectionEngine(nb::object pix_args);
+    nb::object coords(nb::object pbore, nb::object pofs,
+                      nb::object coord);
+    nb::object pixels(nb::object pbore, nb::object pofs, nb::object pixel);
+    vector<int> tile_hits(nb::object pbore, nb::object pofs);
+    nb::object tile_ranges(nb::object pbore, nb::object pofs, nb::list tile_lists);
+    nb::object pointing_matrix(nb::object pbore, nb::object pofs, nb::object response,
+                               nb::object pixel, nb::object proj);
+    nb::object zeros(nb::object shape);
+    nb::object pixel_ranges(nb::object pbore, nb::object pofs, nb::object map, int n_domain=-1);
+    nb::object from_map(nb::object map, nb::object pbore, nb::object pofs,
+                        nb::object response, nb::object signal);
+    nb::object to_map(nb::object map, nb::object pbore, nb::object pofs, nb::object response,
+                      nb::object signal, nb::object det_weights,
+                      nb::object thread_intervals);
+    nb::object to_weight_map(nb::object map, nb::object pbore, nb::object pofs,
+                  nb::object response, nb::object det_weights, nb::object thread_intervals);
 
     int comp_count() const;
     int index_count() const;
@@ -66,3 +74,6 @@ class ProjectionEngine {
 private:
     PixelSys _pixelizor;
 };
+
+
+void register_projection(nb::module_ & m);
diff --git a/include/Ranges.h b/include/Ranges.h
index 1b543b77..227738de 100644
--- a/include/Ranges.h
+++ b/include/Ranges.h
@@ -2,10 +2,14 @@
 
 #include <stdint.h>
 
+#include <nanobind/nanobind.h>
+
 #include "numpy_assist.h"
 
 using namespace std;
-namespace bp = boost::python;
+
+namespace nb = nanobind;
+
 
 // Template class for working with intervals -- pairs of objects of
 // the same (well-ordered) type, with operations defined that support
@@ -23,14 +27,14 @@ class Ranges {
     Ranges(T count) : count{count}, reference(0) {}
     Ranges(T count, T reference) : count{count}, reference(reference) {}
 
-    static Ranges<T> from_array(const bp::object &src, const bp::object &count);
+    static Ranges<T> * from_array(const nb::object &src, const nb::object &count);
 
     // Basic ops
     Ranges<T>& merge(const Ranges<T> &src);
     Ranges<T>& intersect(const Ranges<T> &src);
     Ranges<T>& add_interval(const T start, const T end);
-    Ranges<T>& _add_interval_numpysafe(const bp::object start,
-                                       const bp::object end);
+    Ranges<T>& _add_interval_numpysafe(const nb::object start,
+                                       const nb::object end);
     Ranges<T>& append_interval_no_check(const T start, const T end);
     Ranges<T>& buffer(const T buff);
     Ranges<T>& close_gaps(const T gap);
@@ -38,13 +42,13 @@ class Ranges {
     Ranges<T> complement() const;
     Ranges<T> zeros_like() const;
     Ranges<T> ones_like() const;
-    
+
     void cleanup();
 
-    bp::object ranges() const;
+    nb::object ranges() const;
 
-    Ranges<T> getitem(bp::object indices);
-    bp::object shape();
+    Ranges<T> getitem(nb::object indices);
+    nb::object shape();
     void safe_set_count(T count_);
 
     // Operators.
@@ -56,10 +60,10 @@ class Ranges {
     Ranges<T> operator*(const Ranges<T> &src) const;
 
     // Special conversions.
-    static bp::object from_bitmask(const bp::object &src, int n_bits);
-    static bp::object bitmask(const bp::list &ivlist, int n_bits);
-    static bp::object from_mask(const bp::object &src);
-    bp::object mask();
+    static nb::object from_bitmask(const nb::object &src, int n_bits);
+    static nb::object bitmask(const nb::list &ivlist, int n_bits);
+    static nb::object from_mask(const nb::object &src);
+    nb::object mask();
 
     string Description() const;
 };
@@ -67,11 +71,14 @@ class Ranges {
 
 // Support for working with RangesMatrix, which is basically just a list of Ranges
 template <typename T>
-vector<Ranges<T>> extract_ranges(const bp::object & ival_list) {
-    vector<Ranges<T>> v(bp::len(ival_list));
-    for (int i=0; i<bp::len(ival_list); i++)
-        v[i] = bp::extract<Ranges<T>>(ival_list[i])();
+vector<Ranges<T>> extract_ranges(const nb::object & ival_list) {
+    vector<Ranges<T>> v(nb::len(ival_list));
+    for (int i=0; i<nb::len(ival_list); i++)
+        v[i] = nb::cast<Ranges<T>>(ival_list[i]);
     return v;
 }
 
 typedef Ranges<int32_t> RangesInt32;
+
+
+void register_ranges(nb::module_ & m);
diff --git a/include/array_ops.h b/include/array_ops.h
index 711ee33b..648a5bdf 100644
--- a/include/array_ops.h
+++ b/include/array_ops.h
@@ -1,6 +1,6 @@
 #pragma once
 
-int get_dtype(const bp::object &);
+int get_dtype(const nb::object &);
 
 template <typename T>
 T _calculate_median(const T*, const int);
diff --git a/include/exceptions.h b/include/exceptions.h
index 9e6c19af..3df5b3f1 100644
--- a/include/exceptions.h
+++ b/include/exceptions.h
@@ -1,7 +1,12 @@
 #pragma once
 
-#include <boost/python.hpp>
 #include <exception>
+#include <sstream>
+
+#include <nanobind/nanobind.h>
+
+namespace nb = nanobind;
+
 
 // so3g_exception is our internal base class, which defines the
 // interface we use for converting C++ exceptions to python.
@@ -15,8 +20,8 @@ class so3g_exception : std::exception
     so3g_exception(std::string text) :
         text{text} {}
 
-    virtual std::string msg_for_python() const throw() {
-        return text;
+    const char * what() const throw() {
+        return text.c_str();
     }
 };
 
@@ -45,11 +50,11 @@ class buffer_exception : public TypeError_exception
     std::string var_name;
     buffer_exception(std::string var_name) : var_name{var_name} {}
 
-    std::string msg_for_python() const throw() {
+    const char * what() const throw() {
         std::ostringstream s;
         s << "Argument '" << var_name << "' does not expose buffer protocol, "
             "is not contiguous, or does not export a format.";
-        return s.str();
+        return s.str().c_str();
     }
 };
 
@@ -61,11 +66,11 @@ class shape_exception : public RuntimeError_exception
     shape_exception(std::string var_name, std::string detail) :
         var_name{var_name}, detail(detail) {}
 
-    std::string msg_for_python() const throw() {
+    const char * what() const throw() {
         std::ostringstream s;
         s << "Buffer '" << var_name << "' has incompatible shape: "
           << detail << ".";
-        return s.str();
+        return s.str().c_str();
     }
 };
 
@@ -77,11 +82,11 @@ class dtype_exception : public ValueError_exception
     dtype_exception(std::string var_name, std::string type_str) :
         var_name{var_name}, type_str{type_str} {}
 
-    std::string msg_for_python() const throw() {
+    const char * what() const throw() {
         std::ostringstream s;
         s << "Expected buffer '" << var_name << "' to contain items of type "
           << type_str << ".";
-        return s.str();
+        return s.str().c_str();
     }
 };
 
@@ -92,11 +97,11 @@ class agreement_exception : public RuntimeError_exception
     agreement_exception(std::string var1, std::string var2, std::string prop) :
         var1{var1}, var2{var2}, prop{prop} {}
 
-    std::string msg_for_python() const throw() {
+    const char * what() const throw() {
         std::ostringstream s;
         s << "Expected buffers '" << var1 << "' and '" << var2 << "' to have "
           << "the same " << prop << ".";
-        return s.str();
+        return s.str().c_str();
     }
 };
 
@@ -108,10 +113,10 @@ class tiling_exception : public RuntimeError_exception
     tiling_exception(int tile_idx, std::string msg) :
         tile_idx{tile_idx}, msg{msg} {}
 
-    std::string msg_for_python() const throw() {
+    const char * what() const throw() {
         std::ostringstream s;
         s << "Tiling problem (index " << tile_idx << "): " << msg;
-        return s.str();
+        return s.str().c_str();
     }
 };
 
@@ -122,7 +127,10 @@ class general_agreement_exception : public ValueError_exception
     general_agreement_exception(std::string text) :
         text{text} {}
 
-    std::string msg_for_python() const throw() {
-        return text;
+    const char * what() const throw() {
+        return text.c_str();
     }
 };
+
+
+void register_exceptions(nb::module_ & m);
diff --git a/include/hkagg.h b/include/hkagg.h
index a3cc6850..c3f574ad 100644
--- a/include/hkagg.h
+++ b/include/hkagg.h
@@ -1,12 +1,14 @@
 #pragma once
 
-#include <G3Frame.h>
-#include <G3Map.h>
-
 #include <stdint.h>
 
+#include <nanobind/nanobind.h>
+
 using namespace std;
 
+namespace nb = nanobind;
+
+
 enum HKFrameType {
      session = 0,
      status = 1,
@@ -14,24 +16,4 @@ enum HKFrameType {
 };
 
 
-
-
-class IrregBlockDouble : public G3FrameObject {
-    // Stores a block of timestamped data.  This consists of named
-    // vectors in .data, and a vector of timestamps in .t.  The user
-    // should assure that all these vectors are the same length.
-    //
-    // In the present version, all data vectors as well as the
-    // timestamps are doubles.
-public:
-    string prefix;
-    G3MapVectorDouble data;
-    G3VectorDouble t;
-
-    string Description() const;
-    string Summary() const;
-    template <class A> void serialize(A &ar, unsigned v);
-};
-
-
-G3_SERIALIZABLE(IrregBlockDouble, 0);
+void register_hkagg(nb::module_ & m);
diff --git a/include/numpy_assist.h b/include/numpy_assist.h
index d090ecf1..93db3035 100644
--- a/include/numpy_assist.h
+++ b/include/numpy_assist.h
@@ -1,10 +1,15 @@
 #pragma once
 
-#include <boost/python.hpp>
 #include <exception>
+#include <memory>
+#include <vector>
+
+#include <nanobind/nanobind.h>
 
 #include "exceptions.h"
 
+namespace nb = nanobind;
+
 
 // check_buffer_type<T>(const Py_buffer &view)
 //
@@ -83,28 +88,30 @@ std::string type_name<double>() {
 // or np.int64 can be passed in places where we'd otherwise expect an
 // integer.
 
-inline int numpysafe_extract_int(const bp::object obj, const std::string argstr)
+inline int numpysafe_extract_int(nb::object obj, const std::string argstr)
 {
     int result = 0;
-
-    // Try extracting integer directly.
-    bp::extract<int> extractor(obj);
-    if (extractor.check())
-        return extractor();
-
-    // Maybe this is a numpy.int32, or other array scalar, for which
-    // .item() is the way to pull out the int.
-    if (PyObject_HasAttrString(obj.ptr(), "item")) {
-        bp::object result = (obj.attr("item"))();
-        bp::extract<int> extractor(result);
-        if (extractor.check())
-            return extractor();
+    // Try extracting integer directly and fall back to manual extraction.
+    if (! nb::try_cast<int>(obj, result)) {
+        if (PyObject_HasAttrString(obj.ptr(), "item")) {
+            std::string result_str = nb::str(obj.attr("item")).c_str();
+            if (result_str == "0") {
+                // legitimate zero value
+                result = 0;
+            } else {
+                result = std::atoi(result_str.c_str());
+                // atoi returns zero on error
+                if (result == 0) {
+                    std::string errstr = "Failed to get int from obj item \"" + argstr + "\"";
+                    throw ValueError_exception("errstr");
+                }
+            }
+        } else {
+            std::string errstr = "Failed to interpret argument \"" + argstr + "\" as int.";
+            throw ValueError_exception("errstr");
+        }
     }
-
-    std::string errstr = "Failed to interpret argument \"" + argstr + "\" as int.";
-    PyErr_SetString(PyExc_ValueError, errstr.c_str());
-    bp::throw_error_already_set();
-    return 0;
+    return result;
 }
 
 
@@ -153,7 +160,7 @@ class BufferWrapper {
     }
 
     // Constructor with no shape or type checking.
-    BufferWrapper(std::string name, const bp::object &src, bool optional)
+    BufferWrapper(std::string name, const nb::object &src, bool optional)
         : BufferWrapper() {
         if (optional && (src.ptr() == Py_None))
             return;
@@ -165,7 +172,7 @@ class BufferWrapper {
     }
 
     // Constructor with shape and type checking.
-    BufferWrapper(std::string name, const bp::object &src, bool optional,
+    BufferWrapper(std::string name, const nb::object &src, bool optional,
                   std::vector<int> shape)
         : BufferWrapper(name, src, optional) {
 
diff --git a/include/quaternion.h b/include/quaternion.h
new file mode 100644
index 00000000..40bedbfa
--- /dev/null
+++ b/include/quaternion.h
@@ -0,0 +1,1051 @@
+//  Copied from boost/math/quaternion.hpp, with modifications to allow standalone
+//  use without the rest of boost.
+//
+//  History:
+//  - 2025-05-15:  copied from original github source and modified.
+//
+//  (C) Copyright Hubert Holin 2001.
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for updates, documentation, and revision history.
+
+#ifndef QUATERNION_H
+#define QUATERNION_H
+
+#include <locale>                                    // for the "<<" operator
+#include <complex>
+#include <iosfwd>                                    // for the "<<" and ">>" operators
+#include <sstream>                                    // for the "<<" operator
+
+#include <type_traits>
+
+
+namespace detail {
+
+    template <class T>
+    struct is_trivial_arithmetic_type_imp
+    {
+    typedef std::integral_constant<bool,
+        noexcept(std::declval<T&>() += std::declval<T>())
+        && noexcept(std::declval<T&>() -= std::declval<T>())
+        && noexcept(std::declval<T&>() *= std::declval<T>())
+        && noexcept(std::declval<T&>() /= std::declval<T>())
+    > type;
+    };
+
+    template <class T>
+    struct is_trivial_arithmetic_type : public is_trivial_arithmetic_type_imp<T>::type {};
+}
+
+namespace constexpr_detail
+{
+    template <class T>
+    constexpr void swap(T& a, T& b)
+    {
+    T t(a);
+    a = b;
+    b = t;
+    }
+}
+
+template<typename T>
+class quaternion
+{
+    public:
+
+        typedef T value_type;
+
+
+        // constructor for H seen as R^4
+        // (also default constructor)
+
+        constexpr explicit            quaternion( T const & requested_a = T(),
+                                        T const & requested_b = T(),
+                                        T const & requested_c = T(),
+                                        T const & requested_d = T())
+        :   a(requested_a),
+            b(requested_b),
+            c(requested_c),
+            d(requested_d)
+        {
+            // nothing to do!
+        }
+
+
+        // constructor for H seen as C^2
+
+        constexpr explicit            quaternion( ::std::complex<T> const & z0,
+                                        ::std::complex<T> const & z1 = ::std::complex<T>())
+        :   a(z0.real()),
+            b(z0.imag()),
+            c(z1.real()),
+            d(z1.imag())
+        {
+            // nothing to do!
+        }
+
+
+        // UNtemplated copy constructor
+        constexpr quaternion(quaternion const & a_recopier)
+            : a(a_recopier.R_component_1()),
+            b(a_recopier.R_component_2()),
+            c(a_recopier.R_component_3()),
+            d(a_recopier.R_component_4()) {}
+
+        constexpr quaternion(quaternion && a_recopier)
+            : a(std::move(a_recopier.R_component_1())),
+            b(std::move(a_recopier.R_component_2())),
+            c(std::move(a_recopier.R_component_3())),
+            d(std::move(a_recopier.R_component_4())) {}
+
+        // templated copy constructor
+
+        template<typename X>
+        constexpr explicit            quaternion(quaternion<X> const & a_recopier)
+        :   a(static_cast<T>(a_recopier.R_component_1())),
+            b(static_cast<T>(a_recopier.R_component_2())),
+            c(static_cast<T>(a_recopier.R_component_3())),
+            d(static_cast<T>(a_recopier.R_component_4()))
+        {
+            // nothing to do!
+        }
+
+
+        // destructor
+        // (this is taken care of by the compiler itself)
+
+
+        // accessors
+        //
+        // Note:    Like complex number, quaternions do have a meaningful notion of "real part",
+        //            but unlike them there is no meaningful notion of "imaginary part".
+        //            Instead there is an "unreal part" which itself is a quaternion, and usually
+        //            nothing simpler (as opposed to the complex number case).
+        //            However, for practicality, there are accessors for the other components
+        //            (these are necessary for the templated copy constructor, for instance).
+
+        constexpr T real() const
+        {
+            return(a);
+        }
+
+        constexpr quaternion<T> unreal() const
+        {
+            return(quaternion<T>(static_cast<T>(0), b, c, d));
+        }
+
+        constexpr T R_component_1() const
+        {
+            return(a);
+        }
+
+        constexpr T R_component_2() const
+        {
+            return(b);
+        }
+
+        constexpr T R_component_3() const
+        {
+            return(c);
+        }
+
+        constexpr T R_component_4() const
+        {
+            return(d);
+        }
+
+        constexpr ::std::complex<T> C_component_1() const
+        {
+            return(::std::complex<T>(a, b));
+        }
+
+        constexpr ::std::complex<T> C_component_2() const
+        {
+            return(::std::complex<T>(c, d));
+        }
+
+        constexpr void swap(quaternion& o)
+        {
+            using constexpr_detail::swap;
+            swap(a, o.a);
+            swap(b, o.b);
+            swap(c, o.c);
+            swap(d, o.d);
+        }
+
+        // assignment operators
+
+        template<typename X>
+        constexpr quaternion<T> &        operator = (quaternion<X> const  & a_affecter)
+        {
+            a = static_cast<T>(a_affecter.R_component_1());
+            b = static_cast<T>(a_affecter.R_component_2());
+            c = static_cast<T>(a_affecter.R_component_3());
+            d = static_cast<T>(a_affecter.R_component_4());
+
+            return(*this);
+        }
+
+        constexpr quaternion<T> &        operator = (quaternion<T> const & a_affecter)
+        {
+            a = a_affecter.a;
+            b = a_affecter.b;
+            c = a_affecter.c;
+            d = a_affecter.d;
+
+            return(*this);
+        }
+
+        constexpr quaternion<T> &        operator = (quaternion<T> && a_affecter)
+        {
+            a = std::move(a_affecter.a);
+            b = std::move(a_affecter.b);
+            c = std::move(a_affecter.c);
+            d = std::move(a_affecter.d);
+
+            return(*this);
+        }
+
+        constexpr quaternion<T> &        operator = (T const & a_affecter)
+        {
+            a = a_affecter;
+
+            b = c = d = static_cast<T>(0);
+
+            return(*this);
+        }
+
+        constexpr quaternion<T> &        operator = (::std::complex<T> const & a_affecter)
+        {
+            a = a_affecter.real();
+            b = a_affecter.imag();
+
+            c = d = static_cast<T>(0);
+
+            return(*this);
+        }
+
+        // other assignment-related operators
+        //
+        // NOTE:    Quaternion multiplication is *NOT* commutative;
+        //            symbolically, "q *= rhs;" means "q = q * rhs;"
+        //            and "q /= rhs;" means "q = q * inverse_of(rhs);"
+        //
+        // Note2:   Each operator comes in 2 forms - one for the simple case where
+        //          type T throws no exceptions, and one exception-safe version
+        //          for the case where it might.
+    private:
+        constexpr quaternion<T> &        do_add(T const & rhs, const std::true_type&)
+        {
+            a += rhs;
+            return *this;
+        }
+        constexpr quaternion<T> &        do_add(T const & rhs, const std::false_type&)
+        {
+            quaternion<T> result(a + rhs, b, c, d); // exception guard
+            swap(result);
+            return *this;
+        }
+        constexpr quaternion<T> &        do_add(std::complex<T> const & rhs, const std::true_type&)
+        {
+            a += std::real(rhs);
+            b += std::imag(rhs);
+            return *this;
+        }
+        constexpr quaternion<T> &        do_add(std::complex<T> const & rhs, const std::false_type&)
+        {
+            quaternion<T> result(a + std::real(rhs), b + std::imag(rhs), c, d); // exception guard
+            swap(result);
+            return *this;
+        }
+        template <class X>
+        constexpr quaternion<T> &        do_add(quaternion<X> const & rhs, const std::true_type&)
+        {
+            a += rhs.R_component_1();
+            b += rhs.R_component_2();
+            c += rhs.R_component_3();
+            d += rhs.R_component_4();
+            return *this;
+        }
+        template <class X>
+        constexpr quaternion<T> &        do_add(quaternion<X> const & rhs, const std::false_type&)
+        {
+            quaternion<T> result(a + rhs.R_component_1(), b + rhs.R_component_2(), c + rhs.R_component_3(), d + rhs.R_component_4()); // exception guard
+            swap(result);
+            return *this;
+        }
+
+        constexpr quaternion<T> &        do_subtract(T const & rhs, const std::true_type&)
+        {
+            a -= rhs;
+            return *this;
+        }
+        constexpr quaternion<T> &        do_subtract(T const & rhs, const std::false_type&)
+        {
+            quaternion<T> result(a - rhs, b, c, d); // exception guard
+            swap(result);
+            return *this;
+        }
+        constexpr quaternion<T> &        do_subtract(std::complex<T> const & rhs, const std::true_type&)
+        {
+            a -= std::real(rhs);
+            b -= std::imag(rhs);
+            return *this;
+        }
+        constexpr quaternion<T> &        do_subtract(std::complex<T> const & rhs, const std::false_type&)
+        {
+            quaternion<T> result(a - std::real(rhs), b - std::imag(rhs), c, d); // exception guard
+            swap(result);
+            return *this;
+        }
+        template <class X>
+        constexpr quaternion<T> &        do_subtract(quaternion<X> const & rhs, const std::true_type&)
+        {
+            a -= rhs.R_component_1();
+            b -= rhs.R_component_2();
+            c -= rhs.R_component_3();
+            d -= rhs.R_component_4();
+            return *this;
+        }
+        template <class X>
+        constexpr quaternion<T> &        do_subtract(quaternion<X> const & rhs, const std::false_type&)
+        {
+            quaternion<T> result(a - rhs.R_component_1(), b - rhs.R_component_2(), c - rhs.R_component_3(), d - rhs.R_component_4()); // exception guard
+            swap(result);
+            return *this;
+        }
+
+        constexpr quaternion<T> &        do_multiply(T const & rhs, const std::true_type&)
+        {
+            a *= rhs;
+            b *= rhs;
+            c *= rhs;
+            d *= rhs;
+            return *this;
+        }
+        constexpr quaternion<T> &        do_multiply(T const & rhs, const std::false_type&)
+        {
+            quaternion<T> result(a * rhs, b * rhs, c * rhs, d * rhs); // exception guard
+            swap(result);
+            return *this;
+        }
+
+        constexpr quaternion<T> &        do_divide(T const & rhs, const std::true_type&)
+        {
+            a /= rhs;
+            b /= rhs;
+            c /= rhs;
+            d /= rhs;
+            return *this;
+        }
+        constexpr quaternion<T> &        do_divide(T const & rhs, const std::false_type&)
+        {
+            quaternion<T> result(a / rhs, b / rhs, c / rhs, d / rhs); // exception guard
+            swap(result);
+            return *this;
+        }
+    public:
+
+        constexpr quaternion<T> &        operator += (T const & rhs) { return do_add(rhs, detail::is_trivial_arithmetic_type<T>()); }
+        constexpr quaternion<T> &        operator += (::std::complex<T> const & rhs) { return do_add(rhs, detail::is_trivial_arithmetic_type<T>()); }
+        template<typename X> constexpr quaternion<T> & operator += (quaternion<X> const & rhs) { return do_add(rhs, detail::is_trivial_arithmetic_type<T>()); }
+
+        constexpr quaternion<T> &        operator -= (T const & rhs) { return do_subtract(rhs, detail::is_trivial_arithmetic_type<T>()); }
+        constexpr quaternion<T> &        operator -= (::std::complex<T> const & rhs) { return do_subtract(rhs, detail::is_trivial_arithmetic_type<T>()); }
+        template<typename X> constexpr quaternion<T> & operator -= (quaternion<X> const & rhs) { return do_subtract(rhs, detail::is_trivial_arithmetic_type<T>()); }
+
+        constexpr quaternion<T> &        operator *= (T const & rhs) { return do_multiply(rhs, detail::is_trivial_arithmetic_type<T>()); }
+
+        constexpr quaternion<T> &        operator *= (::std::complex<T> const & rhs)
+        {
+            T    ar = rhs.real();
+            T    br = rhs.imag();
+            quaternion<T> result(a*ar - b*br, a*br + b*ar, c*ar + d*br, -c*br+d*ar);
+            swap(result);
+            return(*this);
+        }
+
+        template<typename X>
+        constexpr quaternion<T> &        operator *= (quaternion<X> const & rhs)
+        {
+            T    ar = static_cast<T>(rhs.R_component_1());
+            T    br = static_cast<T>(rhs.R_component_2());
+            T    cr = static_cast<T>(rhs.R_component_3());
+            T    dr = static_cast<T>(rhs.R_component_4());
+
+            quaternion<T> result(a*ar - b*br - c*cr - d*dr, a*br + b*ar + c*dr - d*cr, a*cr - b*dr + c*ar + d*br, a*dr + b*cr - c*br + d*ar);
+            swap(result);
+            return(*this);
+        }
+
+        constexpr quaternion<T> &        operator /= (T const & rhs) { return do_divide(rhs, detail::is_trivial_arithmetic_type<T>()); }
+
+        constexpr quaternion<T> &        operator /= (::std::complex<T> const & rhs)
+        {
+            T    ar = rhs.real();
+            T    br = rhs.imag();
+            T    denominator = ar*ar+br*br;
+            quaternion<T> result((+a*ar + b*br) / denominator, (-a*br + b*ar) / denominator, (+c*ar - d*br) / denominator, (+c*br + d*ar) / denominator);
+            swap(result);
+            return(*this);
+        }
+
+        template<typename X>
+        constexpr quaternion<T> &        operator /= (quaternion<X> const & rhs)
+        {
+            T    ar = static_cast<T>(rhs.R_component_1());
+            T    br = static_cast<T>(rhs.R_component_2());
+            T    cr = static_cast<T>(rhs.R_component_3());
+            T    dr = static_cast<T>(rhs.R_component_4());
+
+            T    denominator = ar*ar+br*br+cr*cr+dr*dr;
+            quaternion<T> result((+a*ar+b*br+c*cr+d*dr)/denominator, (-a*br+b*ar-c*dr+d*cr)/denominator, (-a*cr+b*dr+c*ar-d*br)/denominator, (-a*dr-b*cr+c*br+d*ar)/denominator);
+            swap(result);
+            return(*this);
+        }
+    private:
+        T a, b, c, d;
+
+};
+
+// swap:
+template <class T>
+constexpr void swap(quaternion<T>& a, quaternion<T>& b) { a.swap(b); }
+
+// operator+
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T2, T1>::value, quaternion<T1> >::type
+operator + (const quaternion<T1>& a, const T2& b)
+{
+   return quaternion<T1>(static_cast<T1>(a.R_component_1() + b), a.R_component_2(), a.R_component_3(), a.R_component_4());
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T1, T2>::value, quaternion<T2> >::type
+operator + (const T1& a, const quaternion<T2>& b)
+{
+   return quaternion<T2>(static_cast<T2>(b.R_component_1() + a), b.R_component_2(), b.R_component_3(), b.R_component_4());
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T2, T1>::value, quaternion<T1> >::type
+operator + (const quaternion<T1>& a, const std::complex<T2>& b)
+{
+   return quaternion<T1>(a.R_component_1() + std::real(b), a.R_component_2() + std::imag(b), a.R_component_3(), a.R_component_4());
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T1, T2>::value, quaternion<T2> >::type
+operator + (const std::complex<T1>& a, const quaternion<T2>& b)
+{
+   return quaternion<T1>(b.R_component_1() + std::real(a), b.R_component_2() + std::imag(a), b.R_component_3(), b.R_component_4());
+}
+template <class T>
+inline constexpr quaternion<T> operator + (const quaternion<T>& a, const quaternion<T>& b)
+{
+   return quaternion<T>(a.R_component_1() + b.R_component_1(), a.R_component_2() + b.R_component_2(), a.R_component_3() + b.R_component_3(), a.R_component_4() + b.R_component_4());
+}
+// operator-
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T2, T1>::value, quaternion<T1> >::type
+operator - (const quaternion<T1>& a, const T2& b)
+{
+   return quaternion<T1>(static_cast<T1>(a.R_component_1() - b), a.R_component_2(), a.R_component_3(), a.R_component_4());
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T1, T2>::value, quaternion<T2> >::type
+operator - (const T1& a, const quaternion<T2>& b)
+{
+   return quaternion<T2>(static_cast<T2>(a - b.R_component_1()), -b.R_component_2(), -b.R_component_3(), -b.R_component_4());
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T2, T1>::value, quaternion<T1> >::type
+operator - (const quaternion<T1>& a, const std::complex<T2>& b)
+{
+   return quaternion<T1>(a.R_component_1() - std::real(b), a.R_component_2() - std::imag(b), a.R_component_3(), a.R_component_4());
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T1, T2>::value, quaternion<T2> >::type
+operator - (const std::complex<T1>& a, const quaternion<T2>& b)
+{
+   return quaternion<T1>(std::real(a) - b.R_component_1(), std::imag(a) - b.R_component_2(), -b.R_component_3(), -b.R_component_4());
+}
+template <class T>
+inline constexpr quaternion<T> operator - (const quaternion<T>& a, const quaternion<T>& b)
+{
+   return quaternion<T>(a.R_component_1() - b.R_component_1(), a.R_component_2() - b.R_component_2(), a.R_component_3() - b.R_component_3(), a.R_component_4() - b.R_component_4());
+}
+
+// operator*
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T2, T1>::value, quaternion<T1> >::type
+operator * (const quaternion<T1>& a, const T2& b)
+{
+   return quaternion<T1>(static_cast<T1>(a.R_component_1() * b), a.R_component_2() * b, a.R_component_3() * b, a.R_component_4() * b);
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T1, T2>::value, quaternion<T2> >::type
+operator * (const T1& a, const quaternion<T2>& b)
+{
+   return quaternion<T2>(static_cast<T2>(a * b.R_component_1()), a * b.R_component_2(), a * b.R_component_3(), a * b.R_component_4());
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T2, T1>::value, quaternion<T1> >::type
+operator * (const quaternion<T1>& a, const std::complex<T2>& b)
+{
+   quaternion<T1> result(a);
+   result *= b;
+   return result;
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T1, T2>::value, quaternion<T2> >::type
+operator * (const std::complex<T1>& a, const quaternion<T2>& b)
+{
+   quaternion<T1> result(a);
+   result *= b;
+   return result;
+}
+template <class T>
+inline constexpr quaternion<T> operator * (const quaternion<T>& a, const quaternion<T>& b)
+{
+   quaternion<T> result(a);
+   result *= b;
+   return result;
+}
+
+// operator/
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T2, T1>::value, quaternion<T1> >::type
+operator / (const quaternion<T1>& a, const T2& b)
+{
+   return quaternion<T1>(a.R_component_1() / b, a.R_component_2() / b, a.R_component_3() / b, a.R_component_4() / b);
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T1, T2>::value, quaternion<T2> >::type
+operator / (const T1& a, const quaternion<T2>& b)
+{
+   quaternion<T2> result(a);
+   result /= b;
+   return result;
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T2, T1>::value, quaternion<T1> >::type
+operator / (const quaternion<T1>& a, const std::complex<T2>& b)
+{
+   quaternion<T1> result(a);
+   result /= b;
+   return result;
+}
+template <class T1, class T2>
+inline constexpr typename std::enable_if<std::is_convertible<T1, T2>::value, quaternion<T2> >::type
+operator / (const std::complex<T1>& a, const quaternion<T2>& b)
+{
+   quaternion<T2> result(a);
+   result /= b;
+   return result;
+}
+template <class T>
+inline constexpr quaternion<T> operator / (const quaternion<T>& a, const quaternion<T>& b)
+{
+   quaternion<T> result(a);
+   result /= b;
+   return result;
+}
+
+
+template<typename T>
+inline constexpr const quaternion<T>&             operator + (quaternion<T> const & q)
+{
+    return q;
+}
+
+
+template<typename T>
+inline constexpr quaternion<T>                    operator - (quaternion<T> const & q)
+{
+    return(quaternion<T>(-q.R_component_1(),-q.R_component_2(),-q.R_component_3(),-q.R_component_4()));
+}
+
+
+template<typename R, typename T>
+inline constexpr typename std::enable_if<std::is_convertible<R, T>::value, bool>::type operator == (R const & lhs, quaternion<T> const & rhs)
+{
+    return    (
+                (rhs.R_component_1() == lhs)&&
+                (rhs.R_component_2() == static_cast<T>(0))&&
+                (rhs.R_component_3() == static_cast<T>(0))&&
+                (rhs.R_component_4() == static_cast<T>(0))
+            );
+}
+
+
+template<typename T, typename R>
+inline constexpr typename std::enable_if<std::is_convertible<R, T>::value, bool>::type operator == (quaternion<T> const & lhs, R const & rhs)
+{
+    return rhs == lhs;
+}
+
+
+template<typename T>
+inline constexpr bool                                operator == (::std::complex<T> const & lhs, quaternion<T> const & rhs)
+{
+    return    (
+                (rhs.R_component_1() == lhs.real())&&
+                (rhs.R_component_2() == lhs.imag())&&
+                (rhs.R_component_3() == static_cast<T>(0))&&
+                (rhs.R_component_4() == static_cast<T>(0))
+            );
+}
+
+
+template<typename T>
+inline constexpr bool                                operator == (quaternion<T> const & lhs, ::std::complex<T> const & rhs)
+{
+    return rhs == lhs;
+}
+
+
+template<typename T>
+inline constexpr bool                                operator == (quaternion<T> const & lhs, quaternion<T> const & rhs)
+{
+    return    (
+                (rhs.R_component_1() == lhs.R_component_1())&&
+                (rhs.R_component_2() == lhs.R_component_2())&&
+                (rhs.R_component_3() == lhs.R_component_3())&&
+                (rhs.R_component_4() == lhs.R_component_4())
+            );
+}
+
+template<typename R, typename T> inline constexpr bool operator != (R const & lhs, quaternion<T> const & rhs) { return !(lhs == rhs); }
+template<typename T, typename R> inline constexpr bool operator != (quaternion<T> const & lhs, R const & rhs) { return !(lhs == rhs); }
+template<typename T> inline constexpr bool operator != (::std::complex<T> const & lhs, quaternion<T> const & rhs) { return !(lhs == rhs); }
+template<typename T> inline constexpr bool operator != (quaternion<T> const & lhs, ::std::complex<T> const & rhs) { return !(lhs == rhs); }
+template<typename T> inline constexpr bool operator != (quaternion<T> const & lhs, quaternion<T> const & rhs) { return !(lhs == rhs); }
+
+
+// Note:    we allow the following formats, with a, b, c, and d reals
+//            a
+//            (a), (a,b), (a,b,c), (a,b,c,d)
+//            (a,(c)), (a,(c,d)), ((a)), ((a),c), ((a),(c)), ((a),(c,d)), ((a,b)), ((a,b),c), ((a,b),(c)), ((a,b),(c,d))
+template<typename T, typename charT, class traits>
+::std::basic_istream<charT,traits> &    operator >> (    ::std::basic_istream<charT,traits> & is,
+                                                        quaternion<T> & q)
+{
+    const ::std::ctype<charT> & ct = ::std::use_facet< ::std::ctype<charT> >(is.getloc());
+
+    T    a = T();
+    T    b = T();
+    T    c = T();
+    T    d = T();
+
+    ::std::complex<T>    u = ::std::complex<T>();
+    ::std::complex<T>    v = ::std::complex<T>();
+
+    charT    ch = charT();
+    char    cc;
+
+    is >> ch;                                        // get the first lexeme
+
+    if    (!is.good())    goto finish;
+
+    cc = ct.narrow(ch, char());
+
+    if    (cc == '(')                            // read "(", possible: (a), (a,b), (a,b,c), (a,b,c,d), (a,(c)), (a,(c,d)), ((a)), ((a),c), ((a),(c)), ((a),(c,d)), ((a,b)), ((a,b),c), ((a,b),(c)), ((a,b,),(c,d,))
+    {
+        is >> ch;                                    // get the second lexeme
+
+        if    (!is.good())    goto finish;
+
+        cc = ct.narrow(ch, char());
+
+        if    (cc == '(')                        // read "((", possible: ((a)), ((a),c), ((a),(c)), ((a),(c,d)), ((a,b)), ((a,b),c), ((a,b),(c)), ((a,b,),(c,d,))
+        {
+            is.putback(ch);
+
+            is >> u;                                // we extract the first and second components
+            a = u.real();
+            b = u.imag();
+
+            if    (!is.good())    goto finish;
+
+            is >> ch;                                // get the next lexeme
+
+            if    (!is.good())    goto finish;
+
+            cc = ct.narrow(ch, char());
+
+            if        (cc == ')')                    // format: ((a)) or ((a,b))
+            {
+                q = quaternion<T>(a,b);
+            }
+            else if    (cc == ',')                // read "((a)," or "((a,b),", possible: ((a),c), ((a),(c)), ((a),(c,d)), ((a,b),c), ((a,b),(c)), ((a,b,),(c,d,))
+            {
+                is >> v;                            // we extract the third and fourth components
+                c = v.real();
+                d = v.imag();
+
+                if    (!is.good())    goto finish;
+
+                is >> ch;                                // get the last lexeme
+
+                if    (!is.good())    goto finish;
+
+                cc = ct.narrow(ch, char());
+
+                if    (cc == ')')                    // format: ((a),c), ((a),(c)), ((a),(c,d)), ((a,b),c), ((a,b),(c)) or ((a,b,),(c,d,))
+                {
+                    q = quaternion<T>(a,b,c,d);
+                }
+                else                            // error
+                {
+                    is.setstate(::std::ios_base::failbit);
+                }
+            }
+            else                                // error
+            {
+                is.setstate(::std::ios_base::failbit);
+            }
+        }
+        else                                // read "(a", possible: (a), (a,b), (a,b,c), (a,b,c,d), (a,(c)), (a,(c,d))
+        {
+            is.putback(ch);
+
+            is >> a;                                // we extract the first component
+
+            if    (!is.good())    goto finish;
+
+            is >> ch;                                // get the third lexeme
+
+            if    (!is.good())    goto finish;
+
+            cc = ct.narrow(ch, char());
+
+            if        (cc == ')')                    // format: (a)
+            {
+                q = quaternion<T>(a);
+            }
+            else if    (cc == ',')                // read "(a,", possible: (a,b), (a,b,c), (a,b,c,d), (a,(c)), (a,(c,d))
+            {
+                is >> ch;                            // get the fourth lexeme
+
+                if    (!is.good())    goto finish;
+
+                cc = ct.narrow(ch, char());
+
+                if    (cc == '(')                // read "(a,(", possible: (a,(c)), (a,(c,d))
+                {
+                    is.putback(ch);
+
+                    is >> v;                        // we extract the third and fourth component
+
+                    c = v.real();
+                    d = v.imag();
+
+                    if    (!is.good())    goto finish;
+
+                    is >> ch;                        // get the ninth lexeme
+
+                    if    (!is.good())    goto finish;
+
+                    cc = ct.narrow(ch, char());
+
+                    if    (cc == ')')                // format: (a,(c)) or (a,(c,d))
+                    {
+                        q = quaternion<T>(a,b,c,d);
+                    }
+                    else                        // error
+                    {
+                        is.setstate(::std::ios_base::failbit);
+                    }
+                }
+                else                        // read "(a,b", possible: (a,b), (a,b,c), (a,b,c,d)
+                {
+                    is.putback(ch);
+
+                    is >> b;                        // we extract the second component
+
+                    if    (!is.good())    goto finish;
+
+                    is >> ch;                        // get the fifth lexeme
+
+                    if    (!is.good())    goto finish;
+
+                    cc = ct.narrow(ch, char());
+
+                    if    (cc == ')')                // format: (a,b)
+                    {
+                        q = quaternion<T>(a,b);
+                    }
+                    else if    (cc == ',')        // read "(a,b,", possible: (a,b,c), (a,b,c,d)
+                    {
+                        is >> c;                    // we extract the third component
+
+                        if    (!is.good())    goto finish;
+
+                        is >> ch;                    // get the seventh lexeme
+
+                        if    (!is.good())    goto finish;
+
+                        cc = ct.narrow(ch, char());
+
+                        if        (cc == ')')        // format: (a,b,c)
+                        {
+                            q = quaternion<T>(a,b,c);
+                        }
+                        else if    (cc == ',')    // read "(a,b,c,", possible: (a,b,c,d)
+                        {
+                            is >> d;                // we extract the fourth component
+
+                            if    (!is.good())    goto finish;
+
+                            is >> ch;                // get the ninth lexeme
+
+                            if    (!is.good())    goto finish;
+
+                            cc = ct.narrow(ch, char());
+
+                            if    (cc == ')')        // format: (a,b,c,d)
+                            {
+                                q = quaternion<T>(a,b,c,d);
+                            }
+                            else                // error
+                            {
+                                is.setstate(::std::ios_base::failbit);
+                            }
+                        }
+                        else                    // error
+                        {
+                            is.setstate(::std::ios_base::failbit);
+                        }
+                    }
+                    else                        // error
+                    {
+                        is.setstate(::std::ios_base::failbit);
+                    }
+                }
+            }
+            else                                // error
+            {
+                is.setstate(::std::ios_base::failbit);
+            }
+        }
+    }
+    else                                        // format:    a
+    {
+        is.putback(ch);
+
+        is >> a;                                    // we extract the first component
+
+        if    (!is.good())    goto finish;
+
+        q = quaternion<T>(a);
+    }
+
+    finish:
+    return(is);
+}
+
+
+template<typename T, typename charT, class traits>
+::std::basic_ostream<charT,traits> &    operator << (    ::std::basic_ostream<charT,traits> & os,
+                                                        quaternion<T> const & q)
+{
+    ::std::basic_ostringstream<charT,traits>    s;
+
+    s.flags(os.flags());
+    s.imbue(os.getloc());
+    s.precision(os.precision());
+
+    s << '('    << q.R_component_1() << ','
+                << q.R_component_2() << ','
+                << q.R_component_3() << ','
+                << q.R_component_4() << ')';
+
+    return os << s.str();
+}
+
+
+// values
+
+template<typename T>
+inline constexpr T real(quaternion<T> const & q)
+{
+    return(q.real());
+}
+
+
+template<typename T>
+inline constexpr quaternion<T> unreal(quaternion<T> const & q)
+{
+    return(q.unreal());
+}
+
+template<typename T>
+inline T sup(quaternion<T> const & q)
+{
+    using    ::std::abs;
+    return (std::max)((std::max)(abs(q.R_component_1()), abs(q.R_component_2())), (std::max)(abs(q.R_component_3()), abs(q.R_component_4())));
+}
+
+
+template<typename T>
+inline T l1(quaternion<T> const & q)
+{
+    using    ::std::abs;
+    return abs(q.R_component_1()) + abs(q.R_component_2()) + abs(q.R_component_3()) + abs(q.R_component_4());
+}
+
+
+template<typename T>
+inline T abs(quaternion<T> const & q)
+{
+    using    ::std::abs;
+    using    ::std::sqrt;
+
+    T            maxim = sup(q);    // overflow protection
+
+    if    (maxim == static_cast<T>(0))
+    {
+        return(maxim);
+    }
+    else
+    {
+        T    mixam = static_cast<T>(1)/maxim;    // prefer multiplications over divisions
+
+        T a = q.R_component_1() * mixam;
+        T b = q.R_component_2() * mixam;
+        T c = q.R_component_3() * mixam;
+        T d = q.R_component_4() * mixam;
+
+        a *= a;
+        b *= b;
+        c *= c;
+        d *= d;
+
+        return(maxim * sqrt(a + b + c + d));
+    }
+
+    //return(sqrt(norm(q)));
+}
+
+
+// Note:    This is the Cayley norm, not the Euclidean norm...
+
+template<typename T>
+inline constexpr T norm(quaternion<T>const  & q)
+{
+    return(real(q*conj(q)));
+}
+
+
+template<typename T>
+inline constexpr quaternion<T> conj(quaternion<T> const & q)
+{
+    return(quaternion<T>(   +q.R_component_1(),
+                            -q.R_component_2(),
+                            -q.R_component_3(),
+                            -q.R_component_4()));
+}
+
+
+template<typename T>
+inline quaternion<T>                    spherical(  T const & rho,
+                                                    T const & theta,
+                                                    T const & phi1,
+                                                    T const & phi2)
+{
+    using ::std::cos;
+    using ::std::sin;
+
+    //T    a = cos(theta)*cos(phi1)*cos(phi2);
+    //T    b = sin(theta)*cos(phi1)*cos(phi2);
+    //T    c = sin(phi1)*cos(phi2);
+    //T    d = sin(phi2);
+
+    T    courrant = static_cast<T>(1);
+
+    T    d = sin(phi2);
+
+    courrant *= cos(phi2);
+
+    T    c = sin(phi1)*courrant;
+
+    courrant *= cos(phi1);
+
+    T    b = sin(theta)*courrant;
+    T    a = cos(theta)*courrant;
+
+    return(rho*quaternion<T>(a,b,c,d));
+}
+
+
+template<typename T>
+inline quaternion<T>                    semipolar(  T const & rho,
+                                                    T const & alpha,
+                                                    T const & theta1,
+                                                    T const & theta2)
+{
+    using ::std::cos;
+    using ::std::sin;
+
+    T    a = cos(alpha)*cos(theta1);
+    T    b = cos(alpha)*sin(theta1);
+    T    c = sin(alpha)*cos(theta2);
+    T    d = sin(alpha)*sin(theta2);
+
+    return(rho*quaternion<T>(a,b,c,d));
+}
+
+
+template<typename T>
+inline quaternion<T>                    multipolar( T const & rho1,
+                                                    T const & theta1,
+                                                    T const & rho2,
+                                                    T const & theta2)
+{
+    using ::std::cos;
+    using ::std::sin;
+
+    T    a = rho1*cos(theta1);
+    T    b = rho1*sin(theta1);
+    T    c = rho2*cos(theta2);
+    T    d = rho2*sin(theta2);
+
+    return(quaternion<T>(a,b,c,d));
+}
+
+
+template<typename T>
+inline quaternion<T>                    cylindrospherical(  T const & t,
+                                                            T const & radius,
+                                                            T const & longitude,
+                                                            T const & latitude)
+{
+    using ::std::cos;
+    using ::std::sin;
+
+
+
+    T    b = radius*cos(longitude)*cos(latitude);
+    T    c = radius*sin(longitude)*cos(latitude);
+    T    d = radius*sin(latitude);
+
+    return(quaternion<T>(t,b,c,d));
+}
+
+
+template<typename T>
+inline quaternion<T>                    cylindrical(T const & r,
+                                                    T const & angle,
+                                                    T const & h1,
+                                                    T const & h2)
+{
+    using ::std::cos;
+    using ::std::sin;
+
+    T    a = r*cos(angle);
+    T    b = r*sin(angle);
+
+    return(quaternion<T>(a,b,h1,h2));
+}
+
+#endif /* QUATERNION_H */
diff --git a/include/so_linterp.h b/include/so_linterp.h
index 99cdc717..e041b9ce 100644
--- a/include/so_linterp.h
+++ b/include/so_linterp.h
@@ -5,6 +5,11 @@
 #include <vector>
 #include <iostream>
 
+#include <nanobind/nanobind.h>
+
+namespace nb = nanobind;
+
+
 class LookupTable
 {
 public:
@@ -69,3 +74,6 @@ class atan2Table : public LookupTable
         return get_raw(y/x);
     }
 };
+
+
+void register_so_linterp(nb::module_ & m);
diff --git a/modules/README.rst b/modules/README.rst
deleted file mode 100644
index d6307b73..00000000
--- a/modules/README.rst
+++ /dev/null
@@ -1,51 +0,0 @@
-Environment Modules
-===================
-
-The spt3g_ installation instructions describe how to build the software, which
-creates a script that initializes a set of environment variables allowing you
-to import spt3g.
-
-To avoid needing to run this script every time you want to use spt3g we
-recommend using environment modules to setup the environment and loading the
-modules in your ``.bashrc`` file.
-
-Installation
-------------
-
-In addition to needing the ``spt3g`` dependencies you also need to install the
-``environment-modules`` package, as well as the ``tcl`` package::
-
-    $ sudo apt-get update
-    $ sudo apt-get install -y tcl environment-modules
-
-Setup
------
-
-To setup, copy this modules directory somewhere you would like to install it,
-for the example we will use your home directory. You should then edit the first
-uncommented line in ``modules/spt3g_shared`` to set the ``g3root`` to the
-location you have cloned the ``spt3g_software`` to. For a user called "vagrant"
-with ``spt3g_softare`` in their home directory it will look like this::
-
-    set g3root /home/vagrant/spt3g_software
-
-Once you have updated this, add the following lines to your ``.bashrc`` file::
-
-    # load spt3g using environment modules
-    module use --append /home/vagrant/modules
-    module load spt3g_shared
-
-Replace ``/vagrant/modules`` with the location you have copied this directory
-to. You can then test by sourcing your ``.bashrc`` and trying to load spt3g::
-  
-    $ source ~/.bashrc
-    $ python3 -c "import spt3g.core"
-
-Shared Installation
--------------------
-
-This can be used to make a shared installation of ``spt3g``, just have other
-users add the same lines to their ``.bashrc`` file and make sure ``spt3g`` is
-installed somewhere they have permissions to read it.
-
-.. _spt3g: https://github.com/CMB-S4/spt3g_software
diff --git a/modules/spt3g_shared b/modules/spt3g_shared
deleted file mode 100644
index 50b7126a..00000000
--- a/modules/spt3g_shared
+++ /dev/null
@@ -1,12 +0,0 @@
-#%Module 1.0
-#
-# spt3g shared
-#
-set g3root /home/vagrant/spt3g_software
-
-setenv          SPT3G_SOFTWARE_PATH $g3root
-setenv          SPT3G_SOFTWARE_BUILD_PATH $g3root/build
-
-prepend-path            PATH            $g3root/build/bin
-prepend-path            LD_LIBRARY_PATH $g3root/build/spt3g
-prepend-path            PYTHONPATH      $g3root/build
diff --git a/pyproject.toml b/pyproject.toml
index c9f1c65c..69ee05d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,17 +1,15 @@
 [build-system]
 requires = [
-    "cmake>=3.17",
-    "setuptools",
-    "wheel",
+    "scikit-build-core >=0.10",
+    "nanobind >=1.3.2",
     "numpy",
-    "scipy",
     # Astropy depends on numpy 1.x with python-3.9.  Place
     # a build-time dependency here so that we build with a
     # compatible version of numpy.  Remove this after dropping
     # python-3.9 support.
     "astropy",
 ]
-build-backend = "setuptools.build_meta"
+build-backend = "scikit_build_core.build"
 
 [project]
 name = "so3g"
@@ -23,6 +21,7 @@ requires-python = ">=3.9"
 dependencies = [
     "numpy",
     "scipy",
+    "spt3g",
     "astropy",
     "matplotlib",
     "ephem",
@@ -43,9 +42,29 @@ classifiers = [
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
 ]
 
+[project.scripts]
+so-hk-tool = "so3g.hk.cli:main"
+
+[tool.scikit-build]
+build.verbose = true
+# cmake.build-type = "Release"
+cmake.build-type = "Debug"
+build-dir = "build/{wheel_tag}"
+wheel.py-api = "cp312"
+
+[tool.scikit-build.wheel.packages]
+"so3g" = "python/so3g"
+"so3g/hk" = "python/so3g/hk"
+"so3g/proj" = "python/so3g/proj"
+"so3g/smurf" = "python/so3g/smurf"
+
+#[tool.scikit-build.metadata.version]
+
 [tool.pytest.ini_options]
 addopts = [
     "--import-mode=importlib",
 ]
+
diff --git a/python/__init__.py b/python/__init__.py
deleted file mode 100644
index 20baae72..00000000
--- a/python/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import os
-import numpy as np
-
-
-if os.getenv('DOCS_BUILD') == '1':
-    from ._libso3g_docstring_shells import *
-else:
-    # For our compiled libraries to load, the spt3g.core library must already be loaded.
-    from . import spt3g
-    from spt3g import core as spt3g_core
-
-    # Our library is called libso3g.{suffix}, but will load into module
-    # namespace so3g.
-    from .load_pybindings import load_pybindings
-    load_pybindings([__path__[0] + '/libso3g'], name='so3g')
-
-# Version is computed by versioneer.
-__version__ = version()
-
-# Other python modules.
-from . import hk
-from . import proj
-
-from .g3reader_shim import G3IndexedReader
diff --git a/python/load_pybindings.py b/python/load_pybindings.py
deleted file mode 100644
index 9a8ea4d4..00000000
--- a/python/load_pybindings.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Based on spt3g.core.load_bindings.
-#
-import platform, sys, os
-
-# Starting in spt3g 0.3-240-ga9d32d5, dload may be used.
-from spt3g import dload
-
-
-if platform.system().startswith('freebsd') or platform.system().startswith('FreeBSD'):
-    # C++ modules are extremely fragile when loaded with RTLD_LOCAL,
-    # which is what Python uses on FreeBSD by default, and maybe other
-    # systems. Convince it to use RTLD_GLOBAL.
-    
-    # See thread by Abrahams et al:
-    # http://mail.python.org/pipermail/python-dev/2002-May/024074.html
-    sys.setdlopenflags(0x102)
-
-def load_pybindings(paths, name=None, lib_suffix=None):
-    """
-    Load all non-private items from the libraries in the list "paths".
-    Provide the full path to each library, but without extension.  The
-    .so or .dylib will be appended depending on the system
-    architecture.  The namespace into which the items are imported
-    will be determined from the first path, unless name= is explicitly
-    provided.
-    """
-    if lib_suffix is None:
-        if platform.system().startswith('Darwin'):
-            # OSX compatibility requires .dylib suffix
-            lib_suffix = ".dylib"
-        else:
-            lib_suffix = ".so"
-    for path in paths:
-        if name is None:
-            name = os.path.split(path)[1]
-        # Save copy of current module def
-        mod = sys.modules[name]
-        m = dload.load_dynamic(name, name, path + lib_suffix)
-        sys.modules[name] = mod # Don't override Python mod with C++
-
-        for (k,v) in m.__dict__.items():
-            if not k.startswith("_"):
-                mod.__dict__[k] = v
diff --git a/python/so3g/__init__.py b/python/so3g/__init__.py
new file mode 100644
index 00000000..1e7b0926
--- /dev/null
+++ b/python/so3g/__init__.py
@@ -0,0 +1,15 @@
+import os
+import numpy as np
+
+# Many downstream packages expect all of the libso3g symbols to be exported
+# into the top namespace.
+from .libso3g import *
+
+# Version is defined in the compiled extension.
+__version__ = version()
+
+# Other python modules.
+from . import hk
+from . import proj
+
+from .g3reader_shim import G3IndexedReader
diff --git a/python/g3reader_shim.py b/python/so3g/g3reader_shim.py
similarity index 87%
rename from python/g3reader_shim.py
rename to python/so3g/g3reader_shim.py
index 0507abc4..be8e659c 100644
--- a/python/g3reader_shim.py
+++ b/python/so3g/g3reader_shim.py
@@ -1,8 +1,9 @@
 import warnings
 
-from . import spt3g
+from spt3g import core
 
-class G3IndexedReader(spt3g.core.G3Reader):
+
+class G3IndexedReader(core.G3Reader):
     def __init__(self, *a, **kw):
         warnings.warn("so3g.G3IndexedReader is deprecated and will be removed "
                       "in a future version; use spt3g.G3Reader (.seek/.tell).",
diff --git a/python/hk/__init__.py b/python/so3g/hk/__init__.py
similarity index 100%
rename from python/hk/__init__.py
rename to python/so3g/hk/__init__.py
diff --git a/python/hk/cli.py b/python/so3g/hk/cli.py
similarity index 99%
rename from python/hk/cli.py
rename to python/so3g/hk/cli.py
index 2cba3291..89efb4b3 100644
--- a/python/hk/cli.py
+++ b/python/so3g/hk/cli.py
@@ -1,4 +1,4 @@
-import so3g
+
 from spt3g import core
 import numpy as np
 import os
diff --git a/python/hk/getdata.py b/python/so3g/hk/getdata.py
similarity index 98%
rename from python/hk/getdata.py
rename to python/so3g/hk/getdata.py
index a7de3c34..8f3deefa 100644
--- a/python/hk/getdata.py
+++ b/python/so3g/hk/getdata.py
@@ -18,10 +18,11 @@
 import numpy as np
 import datetime as dt
 
-
-import so3g
 from spt3g import core
 
+from ..libso3g import IntervalsDouble, HKFrameType
+from .translator import HKTranslator
+
 
 hk_logger = logging.getLogger(__name__)
 hk_logger.setLevel(logging.INFO)
@@ -92,7 +93,7 @@ def __init__(self, field_groups=None):
             self.field_groups = list(field_groups)
         # A translator is used to update frames, on the fly, to the
         # modern schema assumed here.
-        self.translator = so3g.hk.HKTranslator()
+        self.translator = HKTranslator()
 
     def _get_groups(self, fields=None, start=None, end=None,
                     short_match=False):
@@ -136,7 +137,7 @@ def _get_groups(self, fields=None, start=None, end=None,
             in any group.
 
         """
-        span = so3g.IntervalsDouble()
+        span = IntervalsDouble()
         if start is None:
             start = span.domain[0]
         if end is None:
@@ -484,7 +485,7 @@ def __init__(self, pre_proc_dir=None, pre_proc_mode=None):
         self.field_groups = []
         self.frame_info = []
         self.counter = -1
-        self.translator = so3g.hk.HKTranslator()
+        self.translator = HKTranslator()
         self.pre_proc_dir = pre_proc_dir
         self.pre_proc_mode = pre_proc_mode
 
@@ -516,7 +517,7 @@ def Process(self, f, index_info=None):
         vers = f.get('hkagg_version', 0)
         assert(vers == 2)
 
-        if f['hkagg_type'] == so3g.HKFrameType.session:
+        if f['hkagg_type'] == HKFrameType.session:
             session_id = f['session_id']
             if self.session_id is not None:
                 if self.session_id != session_id:
@@ -526,7 +527,7 @@ def Process(self, f, index_info=None):
                               (session_id, f['start_time']), unit='HKScanner')
                 self.session_id = session_id
 
-        elif f['hkagg_type'] == so3g.HKFrameType.status:
+        elif f['hkagg_type'] == HKFrameType.status:
             # If a provider has disappeared, flush its information into a
             # FieldGroup.
             prov_cands = [_HKProvider.from_g3(p) for p in f['providers']]
@@ -539,7 +540,7 @@ def Process(self, f, index_info=None):
             for prov_id in to_flush:
                 self.flush([prov_id])
 
-        elif f['hkagg_type'] == so3g.HKFrameType.data:
+        elif f['hkagg_type'] == HKFrameType.data:
             # Data frame -- merge info for this provider.
             prov = self.providers[f['prov_id']]
             representatives = prov.blocks.keys()
@@ -560,7 +561,7 @@ def Process(self, f, index_info=None):
                       'count': len(b.times)}
                 ii.update(index_info)
                 prov.blocks[bname]['index_info'].append(ii)
-                
+
         else:
             core.log_warn('Weird hkagg_type: %i' % f['hkagg_type'],
                           unit='HKScanner')
@@ -665,7 +666,7 @@ def process_file_with_cache(self, filename):
             with open(path, 'wb') as pkfl:
                 pickle.dump(hksc, pkfl)
             if self.pre_proc_mode is not None:
-                os.chmod( path, self.pre_proc_mode )            
+                os.chmod( path, self.pre_proc_mode )
 
         self.field_groups += hksc.field_groups
         self.counter += hksc.counter
@@ -711,7 +712,7 @@ class _FieldGroup:
     def __init__(self, prefix, fields, start, end, index_info):
         self.prefix = prefix
         self.fields = list(fields)
-        self.cover = so3g.IntervalsDouble().add_interval(start, end)
+        self.cover = IntervalsDouble().add_interval(start, end)
         self.index_info = index_info
     def __repr__(self):
         try:
@@ -721,7 +722,7 @@ def __repr__(self):
             return '_FieldGroup(<bad internal state!>)'
 
 
-def to_timestamp(some_time, str_format=None): 
+def to_timestamp(some_time, str_format=None):
     """Convert the argument to a unix timestamp.
 
     Args:
@@ -736,7 +737,7 @@ def to_timestamp(some_time, str_format=None):
         float: Unix timestamp corresponding to some_time.
 
     """
-    
+
     if type(some_time) == dt.datetime:
         return some_time.astimezone(dt.timezone.utc).timestamp()
     if type(some_time) == int or type(some_time) == float:
@@ -751,10 +752,10 @@ def to_timestamp(some_time, str_format=None):
             except:
                 continue
         raise ValueError('Could not process string into date object, options are: {}'.format(str_options))
-        
+
     raise ValueError('Type of date / time indication is invalid, accepts datetime, int, float, and string')
 
-def load_range(start, stop, fields=None, alias=None, 
+def load_range(start, stop, fields=None, alias=None,
                data_dir=None, config=None, pre_proc_dir=None, pre_proc_mode=None,
                folder_patterns=None, strict=True):
     """Args:
@@ -827,7 +828,7 @@ def load_range(start, stop, fields=None, alias=None,
             hk_logger.warning('''load_range has a config file - data_dir, fields, and alias are ignored''')
         with open(config, 'r') as f:
             setup = yaml.load(f, Loader=yaml.FullLoader)
-        
+
         if 'data_dir' not in setup.keys():
             raise ValueError('load_range config file requires data_dir entry')
         data_dir = setup['data_dir']
@@ -838,14 +839,14 @@ def load_range(start, stop, fields=None, alias=None,
         for k in setup['field_list']:
             fields.append( setup['field_list'][k])
             alias.append( k )
-            
+
     if data_dir is None and 'OCS_DATA_DIR' not in os.environ.keys():
         raise ValueError('if $OCS_DATA_DIR is not defined a data directory must be passed to getdata')
     if data_dir is None:
         data_dir = os.environ['OCS_DATA_DIR']
 
     hk_logger.debug('Loading data from {}'.format(data_dir))
-    
+
     start_ctime = to_timestamp(start) - 3600
     stop_ctime = to_timestamp(stop) + 3600
 
@@ -884,13 +885,13 @@ def load_range(start, stop, fields=None, alias=None,
                 hk_logger.debug('Processing {}'.format(base+'/'+file))
                 hksc.process_file_with_cache( base+'/'+file)
 
-    
+
     cat = hksc.finalize()
     start_ctime = to_timestamp(start)
     stop_ctime = to_timestamp(stop)
-    
+
     all_fields,_ = cat.get_fields()
-    
+
     if fields is None:
         fields = all_fields
     if alias is not None:
@@ -898,7 +899,7 @@ def load_range(start, stop, fields=None, alias=None,
             hk_logger.error('if provided, alias needs to be the length of fields')
     else:
         alias = fields
-    
+
     # Single pass load.
     keepers = []
     for name, field in zip(alias, fields):
@@ -969,7 +970,7 @@ def load_range(start, stop, fields=None, alias=None,
     # This is the easy way, which just gives you one timeline per
     # requested field.
     x1, y1 = cat.simple(field_name)
-    
+
     assert np.all(np.array(x0) == x1) and np.all(np.array(y0) == y1)
 
     import pylab as pl
diff --git a/python/hk/scanner.py b/python/so3g/hk/scanner.py
similarity index 97%
rename from python/hk/scanner.py
rename to python/so3g/hk/scanner.py
index 9fabbfc1..99f3eb60 100644
--- a/python/hk/scanner.py
+++ b/python/so3g/hk/scanner.py
@@ -1,12 +1,13 @@
-import so3g
+
 from spt3g import core
 import numpy as np
 
-from so3g import hk
+from ..libso3g import HKFrameType
+
 
 class HKScanner:
     """Module that scans and reports on HK archive contents and compliance.
-    
+
     Attributes:
       stats (dict): A nested dictionary of statistics that are updated as
         frames are processed by the module.  Elements:
@@ -62,7 +63,7 @@ def __call__(self, f):
         vers = f.get('hkagg_version', 0)
         self.stats['versions'][vers] = self.stats['versions'].get(vers, 0) + 1
 
-        if f['hkagg_type'] == so3g.HKFrameType.session:
+        if f['hkagg_type'] == HKFrameType.session:
             session_id = f['session_id']
             if self.session_id is not None:
                 if self.session_id != session_id:
@@ -73,13 +74,13 @@ def __call__(self, f):
                 self.session_id = session_id
                 self.stats['n_session'] += 1
 
-        elif f['hkagg_type'] == so3g.HKFrameType.status:
+        elif f['hkagg_type'] == HKFrameType.status:
             # Have any providers disappeared?
             now_prov_id = [p['prov_id'].value for p in f['providers']]
             for p, info in self.providers.items():
                 if p not in now_prov_id:
                     info['active'] = False
-            
+
             # New providers?
             for p in now_prov_id:
                 info = self.providers.get(p)
@@ -102,7 +103,7 @@ def __call__(self, f):
                         'block_streams_map': {},  # Map from field name to block name.
                     }
 
-        elif f['hkagg_type'] == so3g.HKFrameType.data:
+        elif f['hkagg_type'] == HKFrameType.data:
             info = self.providers[f['prov_id']]
             vers = f.get('hkagg_version', 0)
 
@@ -178,7 +179,7 @@ def __call__(self, f):
                               'data timestamp vectors (%s) .' % (t_this, t_check),
                               unit='HKScanner')
                 self.stats['concerns']['n_warning'] += 1
-                
+
         else:
             core.log_warn('Weird hkagg_type: %i' % f['hkagg_type'],
                           unit='HKScanner')
diff --git a/python/hk/session.py b/python/so3g/hk/session.py
similarity index 96%
rename from python/hk/session.py
rename to python/so3g/hk/session.py
index f5b79986..48828550 100644
--- a/python/hk/session.py
+++ b/python/so3g/hk/session.py
@@ -1,9 +1,11 @@
-import so3g
+
 from spt3g import core
 import time
 import os
 import binascii
 
+from ..libso3g import HKFrameType
+
 
 class HKSessionHelper:
     def __init__(self, session_id=None, start_time=None, hkagg_version=None,
@@ -93,7 +95,7 @@ def session_frame(self):
         """
         f = core.G3Frame()
         f.type = core.G3FrameType.Housekeeping
-        f['hkagg_type'] = so3g.HKFrameType.session
+        f['hkagg_type'] = HKFrameType.session
         f['hkagg_version'] = self.hkagg_version
         f['session_id'] = self.session_id
         f['start_time'] = self.start_time
@@ -110,7 +112,7 @@ def status_frame(self, timestamp=None):
             timestamp = time.time()
         f = core.G3Frame()
         f.type = core.G3FrameType.Housekeeping
-        f['hkagg_type'] = so3g.HKFrameType.status
+        f['hkagg_type'] = HKFrameType.status
         f['hkagg_version'] = self.hkagg_version
         f['session_id'] = self.session_id
         f['timestamp'] = timestamp
@@ -136,7 +138,7 @@ def data_frame(self, prov_id, timestamp=None):
         f = core.G3Frame()
         f.type = core.G3FrameType.Housekeeping
         f['hkagg_version'] = self.hkagg_version
-        f['hkagg_type'] = so3g.HKFrameType.data
+        f['hkagg_type'] = HKFrameType.data
         f['session_id'] = self.session_id
         f['prov_id'] = prov_id
         f['timestamp'] = timestamp
diff --git a/python/hk/translator.py b/python/so3g/hk/translator.py
similarity index 96%
rename from python/hk/translator.py
rename to python/so3g/hk/translator.py
index d6c10741..efa89c1b 100644
--- a/python/hk/translator.py
+++ b/python/so3g/hk/translator.py
@@ -2,10 +2,11 @@
 
 """
 
-import so3g
-import so3g.hk
 from spt3g import core
 
+from ..libso3g import HKFrameType
+from .util import get_g3_time
+
 
 class HKTranslator:
     """Translates SO Housekeeping frames from schema versions {v0, v1} to
@@ -89,7 +90,7 @@ def Process(self, f):
         f['hkagg_version'] = self.target_version
 
         # No difference in Session/Status for v0, v1, v2.
-        if f.get('hkagg_type') != so3g.HKFrameType.data:
+        if f.get('hkagg_type') != HKFrameType.data:
             return [f]
 
         if self.target_version == 0:
@@ -103,7 +104,7 @@ def Process(self, f):
             # Now process the data blocks.
             for block in orig_blocks:
                 new_block = core.G3TimesampleMap()
-                new_block.times = so3g.hk.util.get_g3_time(block.t)
+                new_block.times = get_g3_time(block.t)
                 for k in block.data.keys():
                     v = block.data[k]
                     new_block[k] = core.G3VectorDouble(v)
diff --git a/python/hk/tree.py b/python/so3g/hk/tree.py
similarity index 98%
rename from python/hk/tree.py
rename to python/so3g/hk/tree.py
index 643519bb..1b324eb6 100644
--- a/python/hk/tree.py
+++ b/python/so3g/hk/tree.py
@@ -3,12 +3,13 @@
 
 """
 
-from so3g.hk import getdata
 import time
 import os
 import yaml
 import logging
 
+from .getdata import to_timestamp, HKArchiveScanner
+
 
 logger = logging.getLogger(__name__)
 
@@ -137,11 +138,11 @@ def __init__(self, start=None, stop=None, config=None,
         if start is None:
             start = now - 86400
         else:
-            start = getdata.to_timestamp(start)
+            start = to_timestamp(start)
         if stop is None:
             stop = start + 86400
         else:
-            stop = getdata.to_timestamp(stop)
+            stop = to_timestamp(stop)
 
         if aliases is None:
             aliases = {}
@@ -170,7 +171,7 @@ def __init__(self, start=None, stop=None, config=None,
 
         # Walk the files -- same approach as load_ranges
         logger.debug('Scanning %s (pre_proc=%s)' % (data_dir, pre_proc_dir))
-        hksc = getdata.HKArchiveScanner(pre_proc_dir=pre_proc_dir)
+        hksc = HKArchiveScanner(pre_proc_dir=pre_proc_dir)
         for folder in range(int(start / 1e5), int(stop / 1e5) + 1):
             base = os.path.join(data_dir, str(folder))
             logger.debug(f' ... checking {base}')
diff --git a/python/hk/util.py b/python/so3g/hk/util.py
similarity index 100%
rename from python/hk/util.py
rename to python/so3g/hk/util.py
diff --git a/python/proj/__init__.py b/python/so3g/proj/__init__.py
similarity index 96%
rename from python/proj/__init__.py
rename to python/so3g/proj/__init__.py
index d0a9790c..23fb3ebd 100644
--- a/python/proj/__init__.py
+++ b/python/so3g/proj/__init__.py
@@ -1,4 +1,6 @@
-import so3g
+
+import numpy as np
+
 from spt3g import core
 
 from . import quat
@@ -10,6 +12,5 @@
 from .weather import Weather, weather_factory
 from .ranges import Ranges, RangesMatrix
 
-import numpy as np
 
 DEG = np.pi/180.
diff --git a/python/proj/coords.py b/python/so3g/proj/coords.py
similarity index 99%
rename from python/proj/coords.py
rename to python/so3g/proj/coords.py
index 54b5d7d5..39cd24c5 100644
--- a/python/proj/coords.py
+++ b/python/so3g/proj/coords.py
@@ -1,11 +1,11 @@
-import so3g
-from . import quat
-from .weather import weather_factory
-
 from collections import OrderedDict
 
 import numpy as np
 
+from . import quat
+from .weather import weather_factory
+
+
 DEG = np.pi / 180.
 
 
diff --git a/python/proj/mapthreads.py b/python/so3g/proj/mapthreads.py
similarity index 93%
rename from python/proj/mapthreads.py
rename to python/so3g/proj/mapthreads.py
index 92e71f2e..915e1c9a 100644
--- a/python/proj/mapthreads.py
+++ b/python/so3g/proj/mapthreads.py
@@ -6,17 +6,26 @@
 
 """
 
-import so3g
 import numpy as np
 
+from ..libso3g import useful_info
+
+from . import quat
+from .coords import Assembly, FocalPlane
+from .wcs import Projectionist
+
+
+DEG = np.pi/180
+
+
 def get_num_threads(n_threads=None):
     """Utility function for computing n_threads.  If n_threads is not
     None, it is returned directly.  But if it is None, then the OpenMP
-    thread count is returned.  Uses so3g.useful_info().
+    thread count is returned.  Uses libso3g.useful_info().
 
     """
     if n_threads is None:
-        return so3g.useful_info()['omp_num_threads']
+        return useful_info()['omp_num_threads']
     return n_threads
 
 def get_threads_domdir(sight, fplane, shape, wcs, tile_shape=None,
@@ -77,11 +86,11 @@ def get_threads_domdir(sight, fplane, shape, wcs, tile_shape=None,
         active_tiles = [0]
 
     # The full assembly, for later.
-    asm_full = so3g.proj.Assembly.attach(sight, fplane)
+    asm_full = Assembly.attach(sight, fplane)
 
     # Get a Projectionist -- note it can be used with full or
     # representative assembly.
-    pmat = so3g.proj.wcs.Projectionist.for_tiled(
+    pmat = Projectionist.for_tiled(
         shape, wcs, tile_shape=tile_shape, active_tiles=active_tiles
     )
     if active_tiles is None:
@@ -93,9 +102,9 @@ def get_threads_domdir(sight, fplane, shape, wcs, tile_shape=None,
     # For the scan direction map, use the "representative" subset
     # detectors, with polarization direction aligned parallel to
     # elevation.
-    xi, eta, gamma = so3g.proj.quat.decompose_xieta(fplane_rep.quats)
-    fplane_xl = so3g.proj.FocalPlane.from_xieta(xi, eta, gamma*0+90*so3g.proj.DEG)
-    asm_rep = so3g.proj.Assembly.attach(sight, fplane_xl)
+    xi, eta, gamma = quat.decompose_xieta(fplane_rep.quats)
+    fplane_xl = FocalPlane.from_xieta(xi, eta, gamma*0+90*DEG)
+    asm_rep = Assembly.attach(sight, fplane_xl)
     sig = np.ones((fplane_xl.ndet, len(asm_rep.Q)), dtype='float32')
     scan_maps = pmat.to_map(sig, asm_rep, comps='TQU')
 
@@ -107,7 +116,7 @@ def get_threads_domdir(sight, fplane, shape, wcs, tile_shape=None,
     phi = np.arctan2(U, Q) / 2
 
     if plot_prefix:
-        text = 'Qf=%.2f Uf=%.2f phi=%.1f deg' % (Q/T, U/T, phi / so3g.proj.DEG)
+        text = 'Qf=%.2f Uf=%.2f phi=%.1f deg' % (Q/T, U/T, phi / DEG)
         for label, _m in tile_iter(scan_maps):
             for i in range(3):
                 pl.imshow(_m[i], origin='lower')
diff --git a/python/proj/quat.py b/python/so3g/proj/quat.py
similarity index 95%
rename from python/proj/quat.py
rename to python/so3g/proj/quat.py
index 634210fe..23775ed0 100644
--- a/python/proj/quat.py
+++ b/python/so3g/proj/quat.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from spt3g.core import quat, G3VectorQuat
+from spt3g.core import Quat, G3VectorQuat
 
 
 """We are using the spt3g quaternion containers,
@@ -15,19 +15,19 @@
 def euler(axis, angle):
     """
     The quaternion representing of an Euler rotation.
-    
+
     For example, if axis=2 the computed quaternion(s) will have
     components:
 
         q = (cos(angle/2), 0, 0, sin(angle/2))
-    
+
     Parameters
     ----------
     axis : {0, 1, 2}
         The index of the cartesian axis of the rotation (x, y, z).
     angle : float or 1-d float array
         Angle of rotation, in radians.
-    
+
     Returns
     -------
     quat or G3VectorQuat, depending on ndim(angle).
@@ -40,13 +40,13 @@ def euler(axis, angle):
     q[..., 0] = c
     q[..., axis+1] = s
     if len(shape) == 1:
-        return quat(*q)
+        return Quat(*q)
     return G3VectorQuat(q)
 
 
 def rotation_iso(theta, phi, psi=None):
     """Returns the quaternion that composes the Euler rotations:
-   
+
         Qz(phi) Qy(theta) Qz(psi)
 
     Note arguments are in radians.
@@ -59,9 +59,9 @@ def rotation_iso(theta, phi, psi=None):
 
 def rotation_lonlat(lon, lat, psi=0., azel=False):
     """Returns the quaternion that composes the Euler rotations:
-        
+
         Qz(lon) Qy(pi/2 - lat) Qz(psi)
-    
+
     Note the three angle arguments are in radians.
 
     If azel is True, then the sign of lon is flipped (as though lon
@@ -95,21 +95,21 @@ def rotation_xieta(xi, eta, gamma=0):
 def decompose_iso(q):
     """Decomposes the rotation encoded by q into the product of Euler
     rotations:
-    
+
         q = Qz(phi) Qy(theta) Qz(psi)
-    
+
     Parameters
     ----------
     q : quat or G3VectorQuat
         The quaternion(s) to be decomposed.
-    
+
     Returns
     -------
     (theta, phi, psi) : tuple of floats or of 1-d arrays
         The rotation angles, in radians.
     """
 
-    if isinstance(q, quat):
+    if isinstance(q, Quat):
         a,b,c,d = q.a, q.b, q.c, q.d
     else:
         a,b,c,d = np.transpose(q)
@@ -143,7 +143,7 @@ def decompose_xieta(q):
     gamma).
 
     """
-    if isinstance(q, quat):
+    if isinstance(q, Quat):
         a,b,c,d = q.a, q.b, q.c, q.d
     else:
         a,b,c,d = np.transpose(q)
diff --git a/python/proj/ranges.py b/python/so3g/proj/ranges.py
similarity index 99%
rename from python/proj/ranges.py
rename to python/so3g/proj/ranges.py
index 76e4febd..11eae004 100644
--- a/python/proj/ranges.py
+++ b/python/so3g/proj/ranges.py
@@ -1,11 +1,11 @@
-import so3g
+
 import numpy as np
 
 """Objects will self report as being of type "RangesInt32" rather than
 Ranges.  But let's try to use so3g.proj.Ranges when testing types and
 making new ones and stuff."""
 
-Ranges = so3g.RangesInt32
+from ..libso3g import RangesInt32 as Ranges
 
 
 class RangesMatrix():
@@ -44,7 +44,7 @@ def copy(self):
     def zeros_like(self):
         return RangesMatrix([x.zeros_like() for x in self.ranges],
                             child_shape=self.shape[1:])
-    
+
     def ones_like(self):
         return RangesMatrix([x.ones_like() for x in self.ranges],
                             child_shape=self.shape[1:])
@@ -53,7 +53,7 @@ def buffer(self, buff):
         [x.buffer(buff) for x in self.ranges]
         ## just to make this work like Ranges.buffer()
         return self
-    
+
     def buffered(self, buff):
         out = self.copy()
         [x.buffer(buff) for x in out.ranges]
@@ -128,7 +128,7 @@ def __add__(self, x):
                 elif self.shape[0] == x.shape[0]:
                     return self.__class__([r + d for r, d in zip(self.ranges, x)])
             return self.__class__([r + x for r in self.ranges])
-        
+
     def __mul__(self, x):
         if isinstance(x, Ranges):
             return self.__class__([d * x for d in self.ranges])
diff --git a/python/proj/util.py b/python/so3g/proj/util.py
similarity index 100%
rename from python/proj/util.py
rename to python/so3g/proj/util.py
diff --git a/python/proj/wcs.py b/python/so3g/proj/wcs.py
similarity index 99%
rename from python/proj/wcs.py
rename to python/so3g/proj/wcs.py
index 8c2005d3..f3c59a0c 100644
--- a/python/proj/wcs.py
+++ b/python/so3g/proj/wcs.py
@@ -1,8 +1,7 @@
-import so3g
-from . import quat
-
 import numpy as np
 
+from .. import libso3g
+from . import quat
 from .ranges import Ranges, RangesMatrix
 from . import mapthreads
 
@@ -128,7 +127,7 @@ def get_ProjEng(self, comps='TQU', proj_name=None, get=True,
         if not get:
             return projeng_name
         try:
-            projeng_cls = getattr(so3g, projeng_name)
+            projeng_cls = getattr(libso3g, projeng_name)
         except AttributeError:
             raise ValueError(f'There is no projector implemented for '
                              f'pixelization "{proj_name}", components '
@@ -445,7 +444,7 @@ def get_active_tiles(self, assembly, assign=False):
         tiles = np.nonzero(hits)[0]
         hits = hits[tiles]
         if assign is True:
-            assign = so3g.useful_info()['omp_num_threads']
+            assign = libso3g.useful_info()['omp_num_threads']
         if assign > 0:
             group_n = np.array([0 for g in range(assign)])
             group_tiles = [[] for _ in group_n]
@@ -771,7 +770,7 @@ def compute_nside_tile(self, assembly, nActivePerThread=5, nThreads=None):
             nActive = len(self.get_active_tiles(assembly)['active_tiles'])
             fsky = nActive / (12 * nside_tile0**2)
             if nThreads is None:
-                nThreads = so3g.useful_info()['omp_num_threads']
+                nThreads = libso3g.useful_info()['omp_num_threads']
             # nside_tile is smallest power of 2 satisfying nTile >= nActivePerThread * nthread / fsky
             self.nside_tile = int(2**np.ceil(0.5 * np.log2(nActivePerThread * nThreads / (12 * fsky))))
             self.nside_tile = min(self.nside_tile, self.nside)
diff --git a/python/proj/weather.py b/python/so3g/proj/weather.py
similarity index 100%
rename from python/proj/weather.py
rename to python/so3g/proj/weather.py
diff --git a/python/smurf/__init__.py b/python/so3g/smurf/__init__.py
similarity index 100%
rename from python/smurf/__init__.py
rename to python/so3g/smurf/__init__.py
diff --git a/python/smurf/reader.py b/python/so3g/smurf/reader.py
similarity index 99%
rename from python/smurf/reader.py
rename to python/so3g/smurf/reader.py
index 1565bbe0..3220f2c2 100644
--- a/python/smurf/reader.py
+++ b/python/so3g/smurf/reader.py
@@ -1,12 +1,14 @@
-import so3g
-from spt3g import core
-import numpy as np
 import pickle
 import datetime, time
 import sys, os
 import warnings
 import argparse
 
+import numpy as np
+
+from spt3g import core
+
+
 def g3_to_array(g3file, verbose=False):
     """
     Takes a G3 file output from the SMuRF archiver and reads to a numpy array.
@@ -22,7 +24,7 @@ def g3_to_array(g3file, verbose=False):
     data : array of arrays, where each internal array is a SMuRF channel
     """
     frames = [fr for fr in core.G3File(g3file)]
-    
+
     data=[]
 
     frametimes = []
@@ -35,13 +37,13 @@ def g3_to_array(g3file, verbose=False):
         warnings.warn('Wrong frame type')
 
     strtimes = np.hstack(frametimes)
-    
+
     times = []
     for strt in strtimes:
         t=core.G3Time(strt).time/core.G3Units.s
         times.append(t)
     times = np.asarray(times)
-    
+
     channums = []
 
     i=0
diff --git a/python/smurf/smurf_archive.py b/python/so3g/smurf/smurf_archive.py
similarity index 99%
rename from python/smurf/smurf_archive.py
rename to python/so3g/smurf/smurf_archive.py
index 6605f58f..52348923 100644
--- a/python/smurf/smurf_archive.py
+++ b/python/so3g/smurf/smurf_archive.py
@@ -1,18 +1,20 @@
+import datetime as dt
+import os
+import ast
+from collections import namedtuple
+from enum import Enum
+
+
+from tqdm import tqdm
+import numpy as np
+import yaml
+
 import sqlalchemy as db
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import sessionmaker, relationship, backref
 
 from spt3g import core
-import so3g
-import datetime as dt
-import os
-from tqdm import tqdm
-import numpy as np
-import yaml
-import ast
-from collections import namedtuple
-from enum import Enum
 
 
 Base = declarative_base()
@@ -164,7 +166,7 @@ def add_file(self, path, session):
         db_file = Files(path=path)
         session.add(db_file)
 
-        reader = so3g.G3Reader(path)
+        reader = core.G3Reader(path)
 
         total_channels = 0
         file_start, file_stop = None, None
@@ -326,7 +328,7 @@ def load_data(self, start, end, show_pb=True, load_biases=True):
         for frame_info in tqdm(frames, total=num_frames, disable=(not show_pb)):
             file = frame_info.file.path
             if file != cur_file:
-                reader = so3g.G3Reader(file)
+                reader = core.G3Reader(file)
                 cur_file = file
 
             reader.seek(frame_info.offset)
@@ -400,7 +402,7 @@ def load_status(self, time, show_pb=False):
         for frame_info in tqdm(status_frames.all(), disable=(not show_pb)):
             file = frame_info.file.path
             if file != cur_file:
-                reader = so3g.G3Reader(file)
+                reader = core.G3Reader(file)
                 cur_file = file
             reader.seek(frame_info.offset)
             frame = reader.Process(None)[0]
diff --git a/python/spt3g.py b/python/spt3g.py
deleted file mode 100644
index cddc2483..00000000
--- a/python/spt3g.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""
-This package simply provides a universal way to import spt3g, either from a
-bundled subpackage or from somewhere on the filesystem.
-"""
-
-import sys
-
-try:
-    from . import spt3g_internal
-    sys.modules["spt3g"] = sys.modules["so3g.spt3g_internal"]
-    sys.modules["spt3g"].__name__ = "spt3g"
-    del sys.modules["so3g.spt3g_internal"]
-    from spt3g import core, __version__, __file__
-except:
-    # Not bundled
-    try:
-        from spt3g import core, __version__, __file__
-    except:
-        raise ImportError("Cannot import either the internal or external spt3g!")
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 8bb4a75a..00000000
--- a/setup.py
+++ /dev/null
@@ -1,358 +0,0 @@
-# This setup.py file simply builds so3g using the underlying cmake build
-# system.  This is only preferred in certain cases where the automation is
-# easier from a setup.py (e.g. readthedocs, pip, etc).
-
-import os
-import sys
-import sysconfig
-import re
-import subprocess as sp
-import glob
-import shutil
-from pathlib import Path
-
-from setuptools import setup, Extension
-from setuptools.command.build_ext import build_ext
-
-import numpy as np
-
-# Absolute path to the directory with this file
-topdir = Path(__file__).resolve().parent
-
-# The version of spt3g we will be installing.  Get this from the
-# Dockerfile for consistency.
-def get_spt3g_version():
-    dockerfile = os.path.join(topdir, "Dockerfile")
-    ver = None
-    linepat = re.compile(r".*simonsobs/spt3g:(.*)\s*")
-    verpat = re.compile(r".*-g(.*)")
-    with open(dockerfile, "r") as f:
-        for line in f:
-            mat = linepat.match(line)
-            if mat is not None:
-                fullver = mat.group(1)
-                vermat = verpat.match(fullver)
-                if vermat is None:
-                    # This must be an actual tag
-                    ver = fullver
-                else:
-                    # Extract the short hash
-                    ver = vermat.group(1)
-    return ver
-
-upstream_spt3g_version = get_spt3g_version()
-
-# The name of the spt3g source and package dirs
-spt3g_src_dir = os.path.join(topdir, "spt3g_software")
-
-
-def get_version():
-    # Call the same python function used by cmake to get the version
-    ver = None
-    try:
-        sys.path.insert(0, os.path.abspath(topdir))
-        from version_h import get_versions as ver_function
-
-        ver_info = ver_function()
-        ver = ver_info["version"]
-        sys.path.pop(0)
-    except:
-        raise RuntimeError("Cannot call get_versions() from version_h.py!")
-    return ver
-
-
-# Define some helper functions to do the actual fetch / build / install
-
-
-def get_spt3g():
-    # We use git to get the repo, since spt3g uses git to get its version
-    # information.
-    if not os.path.isdir(spt3g_src_dir):
-        sp.check_call(
-            [
-                "git",
-                "clone",
-                "https://github.com/CMB-S4/spt3g_software",
-                spt3g_src_dir,
-            ]
-        )
-        sp.check_call(
-            [
-                "git",
-                "-C",
-                spt3g_src_dir,
-                "checkout",
-                "-b",
-                upstream_spt3g_version,
-                upstream_spt3g_version,
-            ]
-        )
-        # Apply patches with any changes
-        patches = glob.glob(f"{topdir}/wheels/spt3g*.patch")
-        for patch_file in patches:
-            if os.path.isfile(patch_file):
-                start_dir = os.getcwd()
-                os.chdir(spt3g_src_dir)
-                sp.check_call(["patch", "-p1", "-i", patch_file])
-                os.chdir(start_dir)
-
-
-def extract_cmake_env(varprefix):
-    cmake_opts = list()
-    cpat = re.compile(f"{varprefix}_(.*)")
-    for k, v in os.environ.items():
-        mat = cpat.match(k)
-        if mat is not None:
-            cmake_opts.append(f"-D{mat.group(1)}={v}")
-    return cmake_opts
-
-
-def build_common(src_dir, build_dir, install_dir, cmake_extra, debug, pkg, version):
-    cmake_args = list()
-    cfg = "Debug" if debug else "Release"
-    cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg]
-    cmake_args += ["-DCMAKE_VERBOSE_MAKEFILE=ON"]
-    cmake_args += [f"-DCMAKE_INSTALL_PREFIX={install_dir}"]
-    cmake_args.extend(extract_cmake_env(f"{pkg}_BUILD"))
-    cmake_args.extend(cmake_extra)
-
-    build_args = ["--config", cfg]
-
-    # Make a copy of the environment so that we can modify it
-    env = os.environ.copy()
-
-    ccomp = env.get("CC", None)
-    cxxcomp = env.get("CXX", None)
-    cflags = env.get("CFLAGS", None)
-    cxxflags = env.get("CXXFLAGS", "")
-    cxxflags = f"{cxxflags} -DVERSION_INFO='{version}'"
-    if sys.platform.lower() == "darwin":
-        cmake_args += ["-DCMAKE_SHARED_LINKER_FLAGS='-undefined dynamic_lookup'"]
-    
-    # Add numpy includes
-    numpy_inc = np.get_include()
-    cxxflags += f" -I{numpy_inc}"
-
-    env["CXXFLAGS"] = cxxflags
-
-    if ccomp is not None:
-        cmake_args += [f"-DCMAKE_C_COMPILER={ccomp}"]
-    if cxxcomp is not None:
-        cmake_args += [f"-DCMAKE_CXX_COMPILER={cxxcomp}"]
-    if cflags is not None:
-        cmake_args += [f"-DCMAKE_C_FLAGS={cflags}"]
-    cmake_args += [f"-DCMAKE_CXX_FLAGS={cxxflags}"]
-
-    if not os.path.exists(build_dir):
-        os.makedirs(build_dir)
-
-    # CMakeLists.txt is in the source dir
-    cmake_list_dir = os.path.abspath(src_dir)
-    print("-" * 10, f"Running {pkg} CMake", "-" * 40)
-    print(f"cmake {cmake_list_dir} {' '.join(cmake_args)}")
-    sp.check_call(["cmake", cmake_list_dir] + cmake_args, cwd=build_dir, env=env)
-
-    make_j = 2
-    if "CPU_COUNT" in os.environ:
-        make_j = int(os.environ["CPU_COUNT"])
-    print("-" * 10, f"Building {pkg}", "-" * 40)
-    cmake_cmd = ["cmake", "--build", "."] + build_args + ["--", f"-j{make_j}"]
-    sp.check_call(cmake_cmd, cwd=build_dir)
-    cmake_cmd = ["cmake", "--install", "."] + build_args
-    sp.check_call(cmake_cmd, cwd=build_dir)
-
-
-def build_spt3g(src_dir, build_dir, install_dir, cmake_extra, debug):
-    # Build spt3g with cmake, using any customizations passed through
-    # environment variables named SPT3G_BUILD_*.  For example, the value
-    # of "SPT3G_BUILD_BLAH" is passed to cmake as "-DBLAH=<value>".
-    build_common(
-        src_dir, build_dir, install_dir, cmake_extra, debug, "SPT3G", upstream_spt3g_version
-    )
-
-
-def build_so3g(src_dir, build_dir, install_dir, cmake_extra, debug):
-    # Build so3g with cmake, using any customizations passed through
-    # environment variables named SO3G_BUILD_*.  For example, the value
-    # of "SO3G_BUILD_BLAH" is passed to cmake as "-DBLAH=<value>".
-    build_common(src_dir, build_dir, install_dir, cmake_extra, debug, "SO3G", get_version())
-
-
-# The spt3g directory needs to be in place before we start.
-get_spt3g()
-
-
-class CMakeExtension(Extension):
-    """
-    This overrides the built-in extension class and essentially does nothing,
-    since all extensions are compiled in one go by the custom build_ext class.
-    """
-
-    def __init__(self, name, sources=[]):
-        super().__init__(name=name, sources=sources)
-
-
-class CMakeBuild(build_ext):
-    """
-    Builds the full package using CMake.
-    """
-
-    def initialize_options(self):
-        super().initialize_options()
-        self.cmake_build_done = False
-
-    def run(self):
-        """
-        Perform build_cmake before doing the 'normal' stuff
-        """
-        for extension in self.extensions:
-            if extension.name == "so3g.libso3g":
-                # We just trigger this on one of the extensions.  build_cmake()
-                # will actually build everything.
-                self.build_cmake()
-        # We DO NOT want to run the base class method.  It will try to copy files
-        # to places that we don't want.
-        # super().run()
-
-    def build_cmake(self):
-        if self.cmake_build_done:
-            return
-        try:
-            out = sp.check_output(["cmake", "--version"])
-        except OSError:
-            raise RuntimeError(
-                "CMake must be installed to build the following extensions: "
-                + ", ".join(e.name for e in self.extensions)
-            )
-
-        # Path to build/temp.<arch>
-        temp_build = Path(self.build_temp).resolve()
-
-        # CMake build directory for so3g
-        temp_so3g = os.path.join(temp_build, "so3g")
-
-        # CMake build directory for spt3g
-        temp_spt3g = os.path.join(temp_build, "spt3g")
-
-        # The python module in the spt3g build directory.  This contains
-        # the compiled libraries and symlinks to the python source.
-        spt3g_python_dir = os.path.join(temp_spt3g, "spt3g")
-
-        # Use CMake to install to the distutils build location
-        install_so3g = os.path.dirname(
-            Path(self.get_ext_fullpath("so3g.libso3g")).resolve().parents[0]
-        )
-
-        # Fake install directory passed to spt3g cmake.
-        install_spt3g_fake = os.path.join(temp_build, "spt3g_install")
-
-        # The cmake python discovery can be fragile.  Here we override some
-        # artifacts explicitly.
-        py_exe = sys.executable
-        py_maj = sys.version_info[0]
-        py_min = sys.version_info[1]
-        # The includes vary slightly between builds and versions, so we call out
-        # to the python-config script for this.
-        out = sp.check_output(
-            ["python3-config", "--includes"],
-            universal_newlines=True,
-        )
-        raw_incl = out.split()[0]
-        py_incl = re.sub("-I", "", raw_incl)
-        dlist3g = [
-            f"-DPython_EXECUTABLE={py_exe}",
-            f"-DPython_INCLUDE_DIRS={py_incl}",
-            "-DPython_LIBRARIES=''",
-            "-DPython_RUNTIME_LIBRARY_DIRS=''",
-            "-DPython_LIBRARY_DIRS=''",
-            f"-DPython_VERSION_MAJOR={py_maj}",
-            f"-DPython_VERSION_MINOR={py_min}",
-            "-DBoost_ARCHITECTURE=-x64",
-            f"-DBoost_PYTHON_TYPE=python{py_maj}{py_min}",
-        ]
-        if "BOOST_ROOT" in os.environ:
-            dlist3g.append(f"-DBOOST_ROOT={os.environ['BOOST_ROOT']}")
-        if "FLAC_ROOT" in os.environ:
-            # The spt3g package uses a custom FindFLAC.cmake, while so3g uses
-            # the built-in one.  Override the spt3g detection.
-            flcroot = os.environ["FLAC_ROOT"]
-            flcext = "so"
-            if sys.platform.lower() == "darwin":
-                flcext = "dylib"
-            dlist3g.extend(
-                [
-                    f"-DFLAC_LIBRARIES={flcroot}/lib/libFLAC.{flcext}",
-                    f"-DFLAC_INCLUDE_DIR={flcroot}/include",
-                    "-DFLAC_FOUND=1",
-                ]
-            )
-
-        build_spt3g(
-            spt3g_src_dir,
-            temp_spt3g,
-            install_spt3g_fake,
-            dlist3g,
-            self.debug,
-        )
-
-        build_so3g(
-            topdir,
-            temp_so3g,
-            install_so3g,
-            [
-                f"-DPYTHON_INSTALL_DEST={install_so3g}",
-                f"-DCMAKE_PREFIX_PATH={install_spt3g_fake}",
-            ],
-            self.debug,
-        )
-
-        # Move spt3g python directory into place.  Remove any stale copy of the
-        # directory.
-        install_spt3g_internal = os.path.join(install_so3g, "so3g", "spt3g_internal")
-        if os.path.isdir(install_spt3g_internal):
-            print(f"rm stale: {install_spt3g_internal}")
-            shutil.rmtree(install_spt3g_internal)
-        print(f"copy {spt3g_python_dir}, {install_spt3g_internal}")
-        shutil.copytree(spt3g_python_dir, install_spt3g_internal, symlinks=False)
-
-        self.cmake_build_done = True
-
-
-ext_modules = [
-    CMakeExtension("so3g.libso3g"),
-    CMakeExtension("so3g.spt3g_internal.libspt3g-core"),
-    CMakeExtension("so3g.spt3g_internal.libspt3g-dfmux"),
-    CMakeExtension("so3g.spt3g_internal.libspt3g-calibration"),
-    CMakeExtension("so3g.spt3g_internal.libspt3g-gcp"),
-    CMakeExtension("so3g.spt3g_internal.libspt3g-maps"),
-]
-
-# Install the python scripts from spt3g
-raw_scripts = glob.glob(os.path.join(spt3g_src_dir, "*", "bin", "*"))
-scripts = [x.removeprefix(f"{topdir}/") for x in raw_scripts]
-
-conf = dict()
-conf["name"] = "so3g"
-conf["version"] = get_version()
-
-# Since the so3g python package is in a directory called "python", we can't use the
-# normal find_packages() function to recursively set these up.  Instead we specify them
-# manually.
-
-conf["packages"] = ["so3g",]
-conf["package_dir"] = {
-    "so3g": "python",
-}
-
-for sub in ["hk", "proj", "smurf"]:
-    psub = f"so3g.{sub}"
-    pdir = os.path.join("python", sub)
-    conf["packages"].append(psub)
-    conf["package_dir"][psub] = pdir
-
-conf["ext_modules"] = ext_modules
-conf["scripts"] = scripts
-conf["cmdclass"] = {"build_ext": CMakeBuild}
-conf["zip_safe"] = False
-
-setup(**conf)
diff --git a/src/Butterworth.cxx b/src/Butterworth.cxx
index 040eceaf..a39d5250 100644
--- a/src/Butterworth.cxx
+++ b/src/Butterworth.cxx
@@ -1,14 +1,14 @@
 #include <assert.h>
 #include <math.h>
 
-#include <pybindings.h>
-#include <container_pybindings.h>
-
 #include "Butterworth.h"
 #include "exceptions.h"
 
 using namespace std;
 
+namespace nb = nanobind;
+
+
 BFilterBank::BFilterBank(const BFilterBank& a) {
     // Copy the parameters but reset the accumulators... that's probably evil.
     for (auto p: a.par)
@@ -34,8 +34,8 @@ BFilterBank& BFilterBank::init(int n_chan) {
     return *this;
 }
 
-void BFilterBank::apply_buffer(boost::python::object input,
-                               boost::python::object output)
+void BFilterBank::apply_buffer(nb::object input,
+                               nb::object output)
 {
     // User wrappers so we can throw exceptions and the view will be
     // released in destructor.
@@ -124,16 +124,20 @@ void BFilterBank::apply_to_float(float *input, float *output, float unit, int n_
 }
 
 
-PYBINDINGS("so3g")
-{
-    bp::class_<BFilterParams>("BFilterParams",
-                              bp::init<int32_t, int32_t, int, int, int>() );
-
-    bp::class_<BFilterBank>("BFilterBank")
-        .def("add", &BFilterBank::add,
-             bp::return_internal_reference<>() )
-        .def("init", &BFilterBank::init,
-             bp::return_internal_reference<>() )
-        .def("apply", &BFilterBank::apply_buffer);
-}
+void register_butterworth(nb::module_ & m) {
+    nb::class_<BFilterParams>(m, "BFilterParams")
+    .def(nb::init<int32_t, int32_t, int, int, int>())
+    .def_rw("b0", &BFilterParams::b0)
+    .def_rw("b1", &BFilterParams::b1)
+    .def_rw("b_bits", &BFilterParams::b_bits)
+    .def_rw("p_bits", &BFilterParams::p_bits)
+    .def_rw("shift", &BFilterParams::shift);
+
+    nb::class_<BFilterBank>(m, "BFilterBank")
+    .def(nb::init<>())
+    .def("add", &BFilterBank::add, nb::rv_policy::none)
+    .def("init", &BFilterBank::init, nb::rv_policy::none)
+    .def("apply", &BFilterBank::apply_buffer);
 
+    return;
+}
diff --git a/src/G3SuperTimestream.cxx b/src/G3SuperTimestream.cxx
deleted file mode 100644
index f69228bd..00000000
--- a/src/G3SuperTimestream.cxx
+++ /dev/null
@@ -1,1236 +0,0 @@
-#define NO_IMPORT_ARRAY
-
-#include <pybindings.h>
-#include <iostream>
-#include <boost/python.hpp>
-
-#include <container_pybindings.h>
-#include <G3SuperTimestream.h>
-#include <cereal/types/utility.hpp>
-
-#ifdef _OPENMP
-# include <omp.h>
-#endif // ifdef _OPENMP
-#include <FLAC/stream_encoder.h>
-#include <bzlib.h>
-
-
-// Debugging variables for compressors.
-// BZ2_VERBOSITY can range from 0 (silent) to 4.
-#define SO3G_BZ2_VERBOSITY 0
-#define SO3G_BZ2_BLOCKSIZE 5
-
-static
-std::string get_bz2_error_string(int err) {
-	std::ostringstream s;
-	switch(err) {
-	case BZ_CONFIG_ERROR:
-		s << "BZ_CONFIG_ERROR (library compilation issue)";
-		break;
-	case BZ_PARAM_ERROR:
-		s <<  "BZ_PARAM_ERROR (bad blocksize, verbosity, etc)";
-		break;
-	case BZ_MEM_ERROR:
-		s << "BZ_MEM_ERROR (not enough memory is available)";
-		break;
-	case BZ_OUTBUFF_FULL:
-		s << "BZ_OUTBUFF_FULL (compressed data too long for buffer)";
-		break;
-	case BZ_OK:
-		s << "BZ_OK (no problem)";
-		break;
-	default:
-		s << "Unknown BZ error code " << err;
-	}
-	return s.str();
-}
-
-static
-std::string get_algo_error_string(std::string var_name, int algo_code)
-{
-	std::ostringstream s;
-	s << "No support for compression algorithm " << var_name << "=" << algo_code;
-	return s.str();
-}
-
-// Split each datum in x into:
-//
-//    x[i] = r[i] + y[i]
-//
-// where r is a multiple of snap_to, -step_at <= y[i] < step_at.  The
-// algorithm tries to have r[i] change slowly, meaning that r[i+1]
-// will not differ from r[i] unless necessary to satisfy the
-// range restriction on y[i].
-//
-// This is only implemented for snap_to and step_at both powers of 2!
-//
-// Let's allow r and x to point to same memory.
-//
-// This will return the number of values that y takes; 0 if there are
-// no data, 1 if it's a constant value (even if that value is 0), or
-// more than 1 for more than 1.
-template <typename T>
-static
-int rebranch(int32_t *y, T *r, T *x, int n_samps, T snap_to, T step_at)
-{
-	T branch = 0;
-        int branch_count = 0;
-	int fails = 0;
-	for (int i=0; i < n_samps; i++) {
-		T new_branch = (x[i] + snap_to / 2) & ~(snap_to - 1);
-		if (new_branch - branch >= step_at ||
-		    new_branch - branch <  -step_at ||
-		    branch_count == 0) {
-			branch = new_branch;
-                        branch_count++;
-                }
-		y[i] = x[i] - branch;
-		// Don't update r until you use x -- they are allowed to overlap.
-		r[i] = branch;
-		if (y[i] < -step_at || y[i] >= step_at)
-			fails++;
-	}
-	return branch_count;
-}
-
-FLAC__StreamEncoderWriteStatus flac_encoder_write_cb(
-	const FLAC__StreamEncoder *encoder,
-	const FLAC__byte buffer[],
-	size_t bytes,
-	unsigned samples,
-	unsigned current_frame,
-	void *client_data)
-{
-	auto ablob = (struct G3SuperTimestream::array_blob *)client_data;
-	if (ablob->count + bytes > ablob->size)
-		return FLAC__STREAM_ENCODER_WRITE_STATUS_FATAL_ERROR;
-	memcpy(ablob->buf + ablob->count, buffer, bytes);
-	ablob->count += bytes;
-	return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
-}
-
-inline int32_t* reserve_size_field(struct G3SuperTimestream::array_blob *ablob)
-{
-	if (ablob->size - ablob->count < sizeof(int32_t))
-		return nullptr;
-	auto p = (int32_t*)(ablob->buf + ablob->count);
-	ablob->count += sizeof(int32_t);
-	*p = ablob->count;
-	return p;
-}
-inline bool close_size_field(struct G3SuperTimestream::array_blob *ablob, int32_t *dest)
-{
-	if (dest == NULL)
-		return false;
-	*dest = ablob->count - *dest;
-	return true;
-}
-
-static
-struct G3SuperTimestream::array_blob encode_array(
-	PyArrayObject *array, std::vector<double> quanta,
-	G3SuperTimestream::options_type options)
-{
-	struct G3SuperTimestream::array_blob ablob;
-
-	// We will write (possibly) compressed data for n_det
-	// detectors by n samples into a buffer.  The data block for
-	// detector i starts at offsets[i] and ends at offsets[i+1].
-	//
-	// Each data block has the structure:
-	//   [data_block] = [algo_code] [encoded_block1] [encoded_block2]
-	//
-	// The algo_code is a single byte drawing values from the
-	// algos enum/bitmask that describes what encoded blocks will
-	// follow.
-	//
-	// If algo_code=0 (a.k.a. ALGO_NONE) then there is one
-	// encoded_block and it is simply the flat uncompressed binary
-	// data for that channel.
-	//
-	// If algo_code != 0, then the encoded blocks will consist
-	// first of a FLAC block (if algo_code & ALGO_DO_FLAC),
-	// followed by a BZ2 block (if ALGO_DO_BZ) or a CONST block
-	// (if ALGO_DO_CONST).
-	//
-	// The FLAC block has the format [length] [flac_data].  The
-	// length is an int32_t giving the number of bytes in
-	// flac_data.  The flac_data is the encoding of n samples of
-	// single-channel 24-bit data.
-	//
-	// The BZ2 block has the format [length] [bz2_data].  The
-	// length is an int32_t giving the number of bytes in
-	// bz2_data.  The bz2_data is the encoding of n samples of the
-	// full-width data (i.e. n*sizeof(dtype) bytes).
-	//
-	// The CONST block is a single full-width value, [datum].
-	// This value represents an offset to add to the data.
-	//
-	// The FLAC data, if present, decode to int32_t.  The BZ2
-	// data, if present, decode to int32_t or int64_t.  The CONST
-	// datum, if present, represents a single int32_t or int64_t.
-	// These two vectors and single offset are added together to
-	// form the output integer array.
-	//
-	// (In practice we will only have one or the other of BZ2 and
-	// CONST blocks.  In the common case that CONST would decode
-	// to 0, it will not be included at all.)
-
-	int n_chans = PyArray_SHAPE(array)[0];
-	int n_samps = PyArray_SHAPE(array)[1];
-	int itemsize = PyArray_ITEMSIZE(array);
-
-	// Max bytes needed to store all the "compressed" data.
-	int n_max = PyArray_NBYTES(array) + n_chans;
-
-	// Initialize the buffer.
-	ablob.buf = new char[n_max];
-	ablob.size = n_max;
-	ablob.count = 0;
-	ablob.offsets.push_back(0);
-
-	int32_t d[n_samps];
-	const int32_t *chan_ptrs[1] = {d};
-
-	char r[n_samps * sizeof(int64_t)];
-	auto r32 = (int32_t*)r;
-	auto r64 = (int64_t*)r;
-
-	npy_intp type_num = PyArray_TYPE(array);
-	char *src = (char*)(PyArray_DATA(array));
-
-	int32_t M = (1 << 24);
-	for (int i=0; i<n_chans; i++, src+=PyArray_STRIDES(array)[0]) {
-		// Save the current ablob.buf pointer in case we need to
-		// roll back the compression, which we will do if we
-		// end up writing to block_limit or beyond.
-		int block_start = ablob.count++;
-		int block_limit = ablob.count + n_samps * itemsize;
-
-		// Oh, the things we'll try.
-		bool try_flac = (options.data_algo & G3SuperTimestream::ALGO_DO_FLAC);
-		bool try_bz = (options.data_algo & G3SuperTimestream::ALGO_DO_BZ);
-		bool try_const = false;
-
-		int8_t algo_used = 0;
-		bool ok = true;
-
-		if (try_flac) {
-			int32_t *flac_size = reserve_size_field(&ablob);
-			int branches = 0;
-			if (type_num == NPY_INT32) {
-				branches = rebranch<int32_t>(d, r32, (int32_t*)src, n_samps, M, M/2);
-			} else if (type_num == NPY_INT64) {
-				branches = rebranch<int64_t>(d, r64, (int64_t*)src, n_samps, M, M/2);
-			} else if (type_num == NPY_FLOAT32) {
-				for (int j=0; j < n_samps; j++)
-					r32[j] = roundf(((float*)src)[j] / quanta[i]);
-				branches = rebranch<int32_t>(d, r32, r32, n_samps, M, M/2);
-			} else if (type_num == NPY_FLOAT64) {
-				for (int j=0; j < n_samps; j++)
-					r64[j] = round(((double*)src)[j] / quanta[i]);
-				branches = rebranch<int64_t>(d, r64, r64, n_samps, M, M/2);
-			} else
-				throw g3supertimestream_exception("Invalid array type encountered.");
-
-			// Re-using this encoder for all
-			// detectors... seems to not work if process
-			// or finish exit with failure.
-			auto encoder = FLAC__stream_encoder_new();
-			FLAC__stream_encoder_set_channels(encoder, 1);
-			FLAC__stream_encoder_set_bits_per_sample(encoder, 24);
-			FLAC__stream_encoder_set_compression_level(encoder, options.flac_level);
-			FLAC__stream_encoder_init_stream(
-				encoder, flac_encoder_write_cb, NULL, NULL, NULL, (void*)(&ablob));
-
-			// If encoding fails, check that it looks like
-			// a simple buffer-to-small error, then continue.
-			int err1;
-			if (!FLAC__stream_encoder_process(encoder, chan_ptrs, n_samps) ||
-			    !FLAC__stream_encoder_finish(encoder)) {
-				auto state = FLAC__stream_encoder_get_state(encoder);
-				if (state != FLAC__STREAM_ENCODER_CLIENT_ERROR) {
-					const char *estr = FLAC__stream_encoder_get_resolved_state_string(encoder);
-					std::ostringstream s;
-					s << "Unexpected FLAC encoder error: " << estr
-					  << " on channel " << i << " (" << n_samps << " samples)";
-					throw g3supertimestream_exception(s.str());
-				}
-				// Discard any partial encoding ... let bzip have a go.
-				ablob.count = block_start + 1;
-                        } else {
-				// Great, update FLAC block size and record it.
-				ok = close_size_field(&ablob, flac_size);
-				algo_used |= G3SuperTimestream::ALGO_DO_FLAC;
-
-				// Const remainder?
-				if (branches <= 1) {
-					try_bz = false;
-					try_const = true;
-				}
-			}
-			FLAC__stream_encoder_delete(encoder);
-		}
-
-		if (!(algo_used & G3SuperTimestream::ALGO_DO_FLAC)) {
-			// Just copy the raw data into the r buffer,
-			// where the bz code will pick it up.
-			memcpy(r, src, n_samps * itemsize);
-			if (type_num == NPY_FLOAT32) {
-				for (int j=0; j<n_samps; j++)
-					((int32_t*)r)[j] = roundf(((float*)r)[j] / quanta[i]);
-			} else if (type_num == NPY_FLOAT64) {
-				for (int j=0; j<n_samps; j++)
-					((int64_t*)r)[j] = round(((double*)r)[j] / quanta[i]);
-			}
-		}
-
-		if (ok && try_const) {
-			// Note "try_const" means "the data are
-			// constant, it's ok to treat them as such".
-			// The code below doesn't check the
-			// assumption.
-			//
-			// Take care in the cute handling of special
-			// case r[0] == 0.  If you naively leave out
-			// the ALGO_DO_CONST bit, and if FLAC
-			// compression was disabled, then you end up
-			// storing algo=0, which the decoder will
-			// interpret as raw format, not zero-byte
-			// ultracompression representing all zeros!
-			int n_copy = 0;
-			switch (type_num) {
-			case NPY_FLOAT32:
-			case NPY_INT32:
-				if (r32[0] != 0 || algo_used == 0)
-					n_copy = sizeof(int32_t);
-				break;
-			case NPY_FLOAT64:
-			case NPY_INT64:
-				if (r64[0] != 0 || algo_used == 0)
-					n_copy = sizeof(int64_t);
-				break;
-			}
-			if (ablob.count + n_copy < block_limit) {
-				if (n_copy > 0) {
-					memcpy(ablob.buf + ablob.count, r, n_copy);
-					ablob.count += n_copy;
-					algo_used |= G3SuperTimestream::ALGO_DO_CONST;
-				}
-			} else {
-				ok = false;
-			}
-		}
-		if (ok && try_bz) {
-			int32_t *bz_size = reserve_size_field(&ablob);
-			algo_used |= G3SuperTimestream::ALGO_DO_BZ;
-			if (ablob.count < block_limit) {
-				unsigned int n_write = block_limit - ablob.count;
-				int err = BZ2_bzBuffToBuffCompress(
-					ablob.buf + ablob.count, &n_write, r, n_samps * itemsize,
-					SO3G_BZ2_BLOCKSIZE, SO3G_BZ2_VERBOSITY, options.bz2_workFactor);
-				if (err == BZ_OUTBUFF_FULL) {
-					// Too long, don't bother.
-					ok = false;
-				} else if (err != BZ_OK) {
-					throw g3supertimestream_exception(get_bz2_error_string(err));
-				} else {
-					ablob.count += n_write;
-					ok = close_size_field(&ablob, bz_size);
-				}
-			} else
-				ok = false;
-		}
-
-		if (ok && ablob.count < block_limit) {
-			// That went well.
-			ablob.buf[block_start] = algo_used;
-		} else {
-			// Bail out into raw copy.
-			ablob.buf[block_start] = G3SuperTimestream::ALGO_NONE;
-			memcpy(ablob.buf + block_start + 1, src, n_samps * itemsize);
-			ablob.count = block_limit;
-		}
-		ablob.offsets.push_back(ablob.count);
-	}
-
-	return ablob;
-}
-
-
-/* G3SuperTimestream */
-
-std::string G3SuperTimestream::Description() const
-{
-	std::ostringstream s;
-	s << "G3SuperTimestream("
-	  << names.size() << ", " << times.size() << ")";
-	return s.str();
-}
-
-std::string G3SuperTimestream::Summary() const
-{
-	return Description();
-}
-
-template <class A> void G3SuperTimestream::load(A &ar, unsigned v)
-{
-	G3_CHECK_VERSION(v);
-	using namespace cereal;
-	ar & make_nvp("parent", base_class<G3FrameObject>(this));
-
-        // Compression options.
-	ar & make_nvp("flac_level", options.flac_level);
-	ar & make_nvp("bz2_workFactor", options.bz2_workFactor);
-
-	ar & make_nvp("times_algo", options.times_algo);
-	if (options.times_algo == ALGO_NONE) {
-		ar & make_nvp("times", times);
-	} else if (options.times_algo == ALGO_DO_BZ) {
-		int n_samps;
-		unsigned int max_bytes;
-		ar & make_nvp("n_samps", n_samps);
-		ar & make_nvp("comp_bytes", max_bytes);
-
-                char _buf[max_bytes];
-                char *buf = (char*)_buf;
-		ar & make_nvp("times_data", binary_data(buf, max_bytes));
-
-		std::vector<int64_t> ints(n_samps);
-		unsigned int n_decomp = n_samps * sizeof(ints[0]);
-		int err = BZ2_bzBuffToBuffDecompress(
-                    (char*)&ints[0], &n_decomp, buf, max_bytes,
-			     1, SO3G_BZ2_VERBOSITY);
-		if (err != BZ_OK)
-			throw g3supertimestream_exception(get_bz2_error_string(err));
-		times = G3VectorTime(ints.begin(), ints.end());
-	} else
-		throw g3supertimestream_exception(
-			get_algo_error_string("times_algo", options.times_algo));
-
-	ar & make_nvp("names", names);
-
-	// Read the desc.
-        ar & make_nvp("type_num", desc.type_num);
-        ar & make_nvp("ndim", desc.ndim);
-	ar & make_nvp("shape", desc.shape);
-        ar & make_nvp("nbytes", desc.nbytes);
-
-	ar & make_nvp("data_algo", options.data_algo);
-	if (options.data_algo == ALGO_NONE) {
-		array = (PyArrayObject*)
-			PyArray_SimpleNew(desc.ndim, desc.shape, desc.type_num);
-		ar & make_nvp("data_raw", binary_data((char*)PyArray_DATA(array),
-						      PyArray_NBYTES(array)));
-	} else {
-		// Read the flacblock
-		ablob = new struct array_blob;
-		ar & make_nvp("quanta", quanta);
-		ar & make_nvp("offsets", ablob->offsets);
-		ar & make_nvp("payload_bytes", ablob->count);
-		ablob->buf = new char[ablob->count];
-		ar & make_nvp("payload", binary_data(ablob->buf, ablob->count));
-	}
-	dataful = true;
-	float_mode = (desc.type_num == NPY_FLOAT32 ||
-		      desc.type_num == NPY_FLOAT64);
-}
-
-template <class A> void G3SuperTimestream::save(A &ar, unsigned v) const
-{
-	using namespace cereal;
-	ar & make_nvp("parent", base_class<G3FrameObject>(this));
-
-        // Compression options.
-	ar & make_nvp("flac_level", options.flac_level);
-	ar & make_nvp("bz2_workFactor", options.bz2_workFactor);
-
-	if (options.times_algo == ALGO_DO_BZ && times.size() > 0) {
-		// Try to bz2 compress.  Convert to a vector of int64_t first.
-		// Note this doesn't run unless n_samps > 0.
-		auto time_ints = vector<long>(times.begin(), times.end());
-		int n_samps = time_ints.size();
-		unsigned int max_bytes = n_samps * sizeof(time_ints[0]);
-
-                char _buf[max_bytes];
-		char *buf = _buf;
-
-		int err = BZ2_bzBuffToBuffCompress(
-			buf, &max_bytes, (char*)&time_ints[0], max_bytes,
-			SO3G_BZ2_BLOCKSIZE, SO3G_BZ2_VERBOSITY, options.bz2_workFactor);
-		if (err == BZ_OUTBUFF_FULL) {
-			// Fallback to no-compression.
-			ar & make_nvp("times_algo", (int8_t)ALGO_NONE);
-			ar & make_nvp("times", times);
-		} else if (err != BZ_OK) {
-			throw g3supertimestream_exception(get_bz2_error_string(err));
-		} else {
-			ar & make_nvp("times_algo", (int8_t)ALGO_DO_BZ);
-			ar & make_nvp("n_samps", n_samps);
-			ar & make_nvp("comp_bytes", max_bytes);
-			ar & make_nvp("times_data", binary_data(buf, max_bytes));
-		}
-	} else {
-		ar & make_nvp("times_algo", (int8_t)ALGO_NONE);
-		ar & make_nvp("times", times);
-	}
-
-	ar & make_nvp("names", names);
-
-	// Write the desc.
-        ar & make_nvp("type_num", desc.type_num);
-        ar & make_nvp("ndim", desc.ndim);
-	ar & make_nvp("shape", desc.shape);
-        ar & make_nvp("nbytes", desc.nbytes);
-
-	ar & make_nvp("data_algo", options.data_algo);
-	if (options.data_algo == ALGO_NONE) {
-		if (array == nullptr)
-			throw g3supertimestream_exception(
-				"Unexpected state: array is NULL.");
-		// Check the endianness
-		//auto this_descr = PyArray_DESCR(array);
-		if (!PyArray_EquivByteorders(PyArray_DESCR(array)->byteorder, NPY_LITTLE))
-			throw g3supertimestream_exception(
-				"The byte_order of the data array is not acceptable.");
-		// Might as well use numpy to repack it properly...
-		PyArrayObject *contig = PyArray_GETCONTIGUOUS(array);
-		ar & make_nvp("data_raw", binary_data((char*)PyArray_DATA(contig),
-						      PyArray_NBYTES(contig)));
-		Py_DECREF((PyObject*)contig);
-	} else {
-		struct array_blob *_ablob = ablob;
-		if (_ablob == nullptr) {
-			// Encode to a copy.
-			_ablob = new struct array_blob;
-			*_ablob = encode_array(array, quanta, options);
-		}
-
-		// Write the ablobblock
-		ar & make_nvp("quanta", quanta);
-		ar & make_nvp("offsets", _ablob->offsets);
-		ar & make_nvp("payload_bytes", _ablob->count);
-		ar & make_nvp("payload", binary_data(_ablob->buf, _ablob->count));
-
-		if (_ablob != ablob) {
-			delete _ablob->buf;
-			delete _ablob;
-		}
-	}
-}
-
-bool G3SuperTimestream::Encode() {
-	if (array == nullptr)
-		return false;
-
-	// Compress the array data.
-	if (options.data_algo == ALGO_NONE)
-		return false;
-	else {
-		ablob = new struct array_blob;
-		*ablob = encode_array(array, quanta, options);
-		Py_XDECREF(array);
-		array = nullptr;
-	}
-	return true;
-}
-
-struct flac_helper {
-	int bytes_remaining;
-	char *src;
-	char *dest;
-	int start;
-	int count;
-};
-
-FLAC__StreamDecoderReadStatus read_callback(
-	const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes, void *client_data)
-{
-	auto fh = (struct flac_helper *)client_data;
-	/* printf(" ... read %i (remaining: %i)\n", *bytes, fh->bytes_remaining); */
-	if (fh->bytes_remaining == 0) {
-		*bytes = 0;
-		return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM;
-	}
-	if (fh->bytes_remaining < *bytes)
-		*bytes = fh->bytes_remaining;
-	memcpy(buffer, fh->src, *bytes);
-	fh->bytes_remaining -= *bytes;
-	fh->src += *bytes;
-	return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
-}
-
-template <typename T>
-FLAC__StreamDecoderWriteStatus write_callback_int(
-	const FLAC__StreamDecoder *decoder, const FLAC__Frame *frame, const FLAC__int32 *const buffer[], void *client_data)
-{
-	auto fh = (struct flac_helper *)client_data;
-	int n = frame->header.blocksize;
-	int drop = fh->start;
-	if (drop >= n) {
-		fh->start -= n;
-	} else {
-		n -= drop;
-		fh->start = 0;
-		if (n > fh->count)
-			n = fh->count;
-		for (int i=0; i<n; i++)
-			((T*)fh->dest)[i] = buffer[0][i+drop];
-		fh->dest += n * sizeof(T);
-		fh->count -= n;
-	}
-	return FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE;
-}
-
-static void flac_decoder_error_cb(const FLAC__StreamDecoder *decoder,
-				  FLAC__StreamDecoderErrorStatus status, void *client_data)
-{
-
-	switch (status) {
-	case FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC:
-		printf("FLAC decoding error (lost sync)");
-	case FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER:
-		printf("FLAC decoding error (bad header)");
-	case FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH:
-		printf("FLAC decoding error (CRC mismatch)");
-	case FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM:
-		printf("FLAC decoding error (unparseable stream)");
-	default:
-		printf("FLAC decoding error (%d)", status);
-	}
-}
-
-template <typename T>
-void expand_branch(struct flac_helper *fh, int n_bytes, char *temp)
-{
-	bool own_temp = (temp == nullptr);
-	unsigned int temp_size = n_bytes;
-
-	int err = BZ2_bzBuffToBuffDecompress(
-		temp, &temp_size, fh->src, n_bytes,
-		1, SO3G_BZ2_VERBOSITY);
-	if (err != BZ_OK)
-		throw g3supertimestream_exception(get_bz2_error_string(err));
-	// Add it in ...
-	for (int i=0; i<fh->count; i++)
-		((T*)fh->dest)[i] += ((T*)temp)[i + fh->start];
-}
-
-template <typename T>
-void broadcast_val(struct flac_helper *fh, int nsamps)
-{
-	T val = *((T*)(fh->src));
-	T *dest = (T*)fh->dest;
-	for (int i=0; i<nsamps; i++)
-		dest[i] += val;
-}
-
-inline int32_t _read_size(struct flac_helper *fh)
-{
-	auto v = *((int32_t*)(fh->src));
-	fh->src += sizeof(v);
-	return v;
-}
-
-bool G3SuperTimestream::Decode()
-{
-	if (ablob == nullptr)
-		return false;
-
-	if (options.data_algo == ALGO_NONE)
-		throw g3supertimestream_exception(
-			"Decode called with ablob buffer but data_algo=0.");
-
-	array = (PyArrayObject*)
-		PyArray_ZEROS(desc.ndim, desc.shape, desc.type_num, 0);
-
-	Extract(bp::object(bp::handle<>(bp::borrowed(reinterpret_cast<PyObject*>(array)))),
-		bp::object(), bp::object(), 0, desc.shape[1]);
-
-	// Destroy the flac bundle.
-	delete ablob->buf;
-	delete ablob;
-	ablob = nullptr;
-
-	return true;
-}
-
-bool G3SuperTimestream::Extract(
-	bp::object dest, bp::object dest_indices, bp::object src_indices,
-	int start, int stop)
-{
-	int n_det_ex = desc.shape[0];
-
-	PyArrayObject *_dest = (PyArrayObject*)dest.ptr();
-
-        auto _src_indices = BufferWrapper<int64_t>("src_indices", src_indices, true,
-                                                   vector<int>{-1});
-        if (_src_indices->obj != NULL)
-            n_det_ex = _src_indices->shape[0];
-
-        auto _dest_indices = BufferWrapper<int64_t>("dest_indices", dest_indices, true,
-                                                   vector<int>{n_det_ex});
-
-	// Sample index.
-	if (start < 0 || start > desc.shape[1])
-		throw g3supertimestream_exception(
-                        "sample start index out of bounds");
-	if (stop < 0)
-		stop = desc.shape[1];
-	if (stop < start || stop > desc.shape[1])
-		throw g3supertimestream_exception(
-                        "sample stop index out of bounds");
-
-	int copy_count = stop - start;
-
-	if (!PyArray_Check(_dest))
-		throw g3supertimestream_exception(
-			"Destination array must be ndarray.");
-	if (PyArray_TYPE(_dest) != desc.type_num)
-		throw g3supertimestream_exception(
-			"Destination array not of correct type.");
-	if ((PyArray_NDIM(_dest) != 2) ||
-	    (PyArray_SHAPE(_dest)[0] < n_det_ex) ||
-	    (PyArray_SHAPE(_dest)[1] != copy_count))
-		throw g3supertimestream_exception(
-			"Destination array does not have correct shape.");
-	if (PyArray_STRIDE(_dest, 1) != PyArray_ITEMSIZE(_dest))
-		throw g3supertimestream_exception(
-			"Destination array should be strictly packed on last dimension.");
-
-	// Insist array is not already decompressed.
-	if (ablob == nullptr)
-		throw g3supertimestream_exception(
-			"These data are already fully decoded; use .data.");
-
-	// Decompress or copy into a buffer.
-	FLAC__StreamDecoderWriteCallback this_write_callback;
-	void (*expand_func)(struct flac_helper *, int, char*) = nullptr;
-	void (*broadcast_func)(struct flac_helper *, int) = nullptr;
-	int elsize = 0;
-
-	switch (desc.type_num) {
-	case NPY_INT32:
-	case NPY_FLOAT32:
-		this_write_callback = &write_callback_int<int32_t>;
-		expand_func = expand_branch<int32_t>;
-		broadcast_func = broadcast_val<int32_t>;
-		elsize = sizeof(int32_t);
-		break;
-	case NPY_INT64:
-	case NPY_FLOAT64:
-		this_write_callback = &write_callback_int<int64_t>;
-		expand_func = expand_branch<int64_t>;
-		broadcast_func = broadcast_val<int64_t>;
-		elsize = sizeof(int64_t);
-		break;
-	default:
-		throw g3supertimestream_exception("Invalid array type encountered.");
-	}
-
-#pragma omp parallel
-	{
-
-	// Each OMP thread needs its own workspace, FLAC decoder, and helper structure
-	char temp[desc.shape[1] * elsize + 1];
-	FLAC__StreamDecoder *decoder = nullptr;
-	struct flac_helper helper;
-
-#pragma omp for
-	for (int i=0; i<n_det_ex; i++) {
-		// Source vector index
-		int src_i = i;
-		if (_src_indices.test()) {
-			src_i = *_src_indices.ptr_1d(i);
-			if (src_i < 0 || src_i >= desc.shape[0])
-				continue;
-		}
-
-		// Dest vector index
-		int dest_i = i;
-		if (_dest_indices.test()) {
-			dest_i = *_dest_indices.ptr_1d(i);
-			if (dest_i < 0 || dest_i >= PyArray_SHAPE(_dest)[0])
-				continue;
-		}
-
-		char* this_data = (char*)PyArray_DATA(_dest) + PyArray_STRIDES(_dest)[0]*dest_i;
-
-		// Cue up this detector's data and read the algo code.
-		helper.src = ablob->buf + ablob->offsets[src_i];
-		int8_t algo = *(helper.src++);
-
-		if (algo == ALGO_NONE) {
-			memcpy(this_data, helper.src + start * elsize, copy_count * elsize);
-		}
-		if (algo & ALGO_DO_FLAC) {
-			if (decoder == nullptr)
-				decoder = FLAC__stream_decoder_new();
-			helper.bytes_remaining = _read_size(&helper);
-			helper.dest = this_data;
-			helper.start = start;
-			helper.count = copy_count;
-
-			FLAC__stream_decoder_init_stream(
-				decoder, read_callback, NULL, NULL, NULL, NULL,
-				*this_write_callback, NULL, flac_decoder_error_cb,
-				(void*)&helper);
-			FLAC__stream_decoder_process_until_end_of_stream(decoder);
-			FLAC__stream_decoder_finish(decoder);
-		}
-
-		// A bz2 field of slow offsets?
-		if (algo & ALGO_DO_BZ) {
-			helper.bytes_remaining = _read_size(&helper);
-			helper.dest = this_data;
-			helper.start = start;
-			helper.count = copy_count;
-			expand_func(&helper, desc.shape[1] * elsize, (char*)temp);
-		}
-
-		// Single flat offset?
-		if (algo & ALGO_DO_CONST) {
-			helper.dest = this_data;
-			broadcast_func(&helper, copy_count);
-		}
-
-		// Now convert for precision.
-		if (desc.type_num == NPY_FLOAT32) {
-			auto src = (int32_t*)this_data;
-			auto dest = (float*)this_data;
-			for (int j=0; j<PyArray_SHAPE(_dest)[1]; j++)
-				dest[j] = (float)src[j] * quanta[src_i];
-		} else if (desc.type_num == NPY_FLOAT64) {
-			auto src = (int64_t*)this_data;
-			auto dest = (double*)this_data;
-			for (int j=0; j<PyArray_SHAPE(_dest)[1]; j++)
-				dest[j] = src[j] * quanta[src_i];
-		}
-	}
-	if (decoder != nullptr)
-		FLAC__stream_decoder_delete(decoder);
-	} // omp parallel
-
-	return true;
-}
-
-/* Options()
-
-   Note that enable=[0,1] is not overriding -- it's instead more of a
-   shorthand for the default values of data_algo and times_algo.
-*/
-
-int G3SuperTimestream::Options(
-	int enable, int flac_level, int bz2_workFactor,
-	int data_algo, int times_algo)
-{
-	// Global enable / disable first.
-	if (enable == 0) {
-		options.data_algo = 0;
-		options.times_algo = 0;
-	} else if (enable > 0) {
-		options.data_algo = ALGO_DO_FLAC | ALGO_DO_BZ;
-		options.times_algo = ALGO_DO_BZ;
-	}
-
-	if (data_algo >= 0)
-		options.data_algo = data_algo;
-	if (times_algo >= 0)
-		options.times_algo = times_algo;
-
-	if (flac_level >= 0)
-		options.flac_level = flac_level;
-	if (bz2_workFactor >= 0)
-		options.bz2_workFactor = bz2_workFactor;
-
-	return 0;
-}
-
-
-template <typename T>
-static
-void _apply_cals_typed(PyArrayObject *array, std::vector<double> cals)
-{
-	for (int i=0; i<PyArray_SHAPE(array)[0]; i++) {
-		auto dest = (T*)((char*)PyArray_DATA(array) + PyArray_STRIDES(array)[0] * i);
-		for (int j=0; j<PyArray_SHAPE(array)[1]; j++)
-			dest[j] *= cals[i];
-	}
-}
-
-void _apply_cals(PyArrayObject *array, std::vector<double> cals)
-{
-	switch (PyArray_TYPE(array)) {
-	case NPY_FLOAT32:
-		_apply_cals_typed<float>(array, cals);
-		break;
-	case NPY_FLOAT64:
-		_apply_cals_typed<double>(array, cals);
-		break;
-	default:
-		throw g3supertimestream_exception("Unexpected dtype!");
-	}
-}
-
-void G3SuperTimestream::Calibrate(std::vector<double> rescale)
-{
-	if (rescale.size() != names.size())
-		throw g3supertimestream_exception(
-			"Rescale vector has unexpected length.");
-	if (float_mode) {
-		// Modification to the calibration.
-		if (array)
-			_apply_cals(array, rescale);
-		for (int i=0; i<quanta.size(); i++)
-			quanta[i] *= rescale[i];
-	} else {
-		// Transition to float_mode.  If holding integer
-		// array, convert it.
-		if (dataful) {
-			switch(desc.type_num) {
-			case NPY_INT32:
-				desc.type_num = NPY_FLOAT32;
-				break;
-			case NPY_INT64:
-				desc.type_num = NPY_FLOAT64;
-				break;
-			default:
-				throw g3supertimestream_exception("Unexpected dtype!");
-			}
-			if (array) {
-				auto new_array = (PyArrayObject*)PyArray_FromAny(
-					(PyObject*)array, PyArray_DescrFromType(desc.type_num),
-					0, 0, NPY_ARRAY_FORCECAST, NULL);
-				if (new_array == nullptr)
-					throw g3supertimestream_exception(
-						"Failed to allocate float array.");
-				_apply_cals(new_array, rescale);
-				Py_DECREF(array);
-				array = new_array;
-			}
-		}
-		float_mode = true;
-		quanta = rescale;
-	}
-}
-
-G3SuperTimestream::G3SuperTimestream()
-{
-	array = nullptr;
-	ablob = nullptr;
-	float_mode = false;
-	dataful = false;
-	if (!PyArray_EquivByteorders(NPY_NATIVE, NPY_LITTLE))
-		throw g3supertimestream_exception(
-			"This class hasn't been trained on BIG-endian machines.!");
-
-	// Set defaults.
-	Options(1, 1, 30, -1, -1);
-}
-
-G3SuperTimestream::~G3SuperTimestream()
-{
-	if (array != nullptr)
-		Py_XDECREF(array);
-	if (ablob != nullptr) {
-		delete ablob->buf;
-		delete ablob;
-	}
-}
-
-G3SuperTimestream::G3SuperTimestream(
-	const G3VectorString &names_, const G3VectorTime &times_)
-	: G3SuperTimestream()
-{
-	names = G3VectorString(names_);
-	times = G3VectorTime(times_);
-}
-
-static
-void safe_set_data(G3SuperTimestream &self, const bp::object object_in);
-static
-void safe_set_quanta(G3SuperTimestream &self, std::vector<double> quanta);
-
-G3SuperTimestream::G3SuperTimestream(
-	const G3VectorString &names_, const G3VectorTime &times_,
-	const bp::object &data_)
-	: G3SuperTimestream(names_, times_)
-{
-	safe_set_data(*this, data_);
-}
-
-G3SuperTimestream::G3SuperTimestream(
-	const G3VectorString &names_, const G3VectorTime &times_,
-	const bp::object &data_, const std::vector<double> &quanta_)
-	: G3SuperTimestream(names_, times_)
-{
-	safe_set_quanta(*this, quanta_);
-	safe_set_data(*this, data_);
-}
-
-static
-void safe_set_times(G3SuperTimestream &self, G3VectorTime _times)
-{
-	// Only allow this if it doesn't upset consistency.  We will
-	// assume that, coming in, we're internally consistent.
-	if (_times.size() != self.times.size() && self.times.size() != 0) {
-		std::ostringstream s;
-		s << "Cannot set .times because it conflicts with "
-		  << "the established number of samples (" << self.times.size()
-		  << ").";
-		throw g3supertimestream_exception(s.str());
-	}
-	self.times = _times;
-}
-
-static
-void safe_set_names(G3SuperTimestream &self, G3VectorString _names)
-{
-	// Only allow this if it doesn't upset consistency.  We will
-	// assume that, coming in, we're internally consistent.
-	if (_names.size() != self.names.size() && self.names.size() != 0) {
-		std::ostringstream s;
-		s << "Cannot set .names because it conflicts with "
-		  << "the established number of channels (" << self.names.size()
-		  << ").";
-		throw g3supertimestream_exception(s.str());
-	}
-	self.names = _names;
-}
-
-static
-void safe_set_data(G3SuperTimestream &self, const bp::object object_in)
-{
-	// Note this function, as invoked here, might return a
-	// reference or create a new array.
-	PyObject *ob = PyArray_FromAny(object_in.ptr(), NULL, 0, 0, 0, NULL);
-	if (ob == NULL)
-		throw g3supertimestream_exception("Could not decode array.");
-
-	PyArrayObject *_array = reinterpret_cast<PyArrayObject*>(ob);
-
-	if (PyArray_NDIM(_array) != 2) {
-		Py_XDECREF(ob);
-		throw g3supertimestream_exception("Bad ndim.");
-	}
-	if (PyArray_DIMS(_array)[0] != self.names.size()) {
-		Py_XDECREF(ob);
-		throw g3supertimestream_exception("Bad shape[0].");
-	}
-	if (PyArray_DIMS(_array)[1] != self.times.size()) {
-		Py_XDECREF(ob);
-		throw g3supertimestream_exception("Bad shape[1].");
-	}
-	if (!PyArray_EquivByteorders(PyArray_DESCR(_array)->byteorder, NPY_LITTLE)) {
-		//There are other ways to deal with endianness.
-		Py_XDECREF(ob);
-		throw g3supertimestream_exception("Bad endianness.");
-	}
-
-	bool is_floaty = false;
-	switch(PyArray_TYPE(_array)) {
-	case NPY_FLOAT32:
-	case NPY_FLOAT64:
-		is_floaty = true;
-		break;
-	case NPY_INT32:
-	case NPY_INT64:
-		break;
-	default:
-		Py_XDECREF(ob);
-		throw g3supertimestream_exception("Forbidden dtype.");
-	}
-
-	if (is_floaty) {
-		// quanta has to be set already.
-		if (self.quanta.size() != PyArray_DIMS(_array)[0])
-			throw g3supertimestream_exception(
-				"User must set .quanta before loading float array.");
-	} else {
-		if (self.quanta.size() != 0)
-			throw g3supertimestream_exception(
-				"The .quanta must be empty when loading integer array.");
-	}
-
-	// Clear cached array or compressed data.
-	if (self.array) {
-		Py_XDECREF((PyObject*)self.array);
-		self.array = nullptr;
-	}
-	if (self.ablob) {
-		delete self.ablob->buf;
-		delete self.ablob;
-		self.ablob = nullptr;
-	}
-
-	self.dataful = true;
-	self.float_mode = is_floaty;
-
-	self.desc.ndim = PyArray_NDIM(_array);
-	self.desc.type_num = PyArray_TYPE(_array);
-
-	self.desc.nbytes = PyArray_NBYTES(_array);
-	for (int i=0; i<self.desc.ndim; i++)
-		self.desc.shape[i] = PyArray_DIMS(_array)[i];
-
-	self.array = _array;
-}
-
-static
-bp::object safe_get_data(G3SuperTimestream &self)
-{
-	if (self.array == nullptr)
-		self.Decode();
-	return bp::object(bp::handle<>(bp::borrowed(reinterpret_cast<PyObject*>(self.array))));
-}
-
-static
-bp::object safe_get_dtype(G3SuperTimestream &self)
-{
-	PyObject *d;
-	if (self.array == nullptr) {
-		d = reinterpret_cast<PyObject*>(
-			PyArray_DescrFromType(self.desc.type_num));
-	} else {
-		d = reinterpret_cast<PyObject*>(PyArray_DESCR(self.array));
-		Py_XINCREF(d);
-	}
-	return bp::object(bp::handle<>(d));
-}
-
-static
-bp::object safe_get_quanta(G3SuperTimestream &self)
-{
-	if (!self.float_mode)
-		return bp::object();
-
-	npy_intp shape[1] = {(npy_intp)self.quanta.size()};
-	auto output = (PyArrayObject *)PyArray_SimpleNew(1, shape, NPY_FLOAT64);
-	memcpy(PyArray_DATA(output), &self.quanta[0], shape[0] * sizeof(self.quanta[0]));
-	return bp::object(bp::handle<>(reinterpret_cast<PyObject*>(output)));
-}
-
-static
-void safe_set_quanta(G3SuperTimestream &self, std::vector<double> quanta)
-{
-	// Only allowed to set quanta directly if data is not present.
-	if (!self.dataful)
-		self.Calibrate(quanta);
-	else
-		throw g3supertimestream_exception(
-			"The .quanta cannot be set directly once .data is set.  Use .calibrate().");
-}
-
-//Copies data out of a flat buffer, creating the numpy array along the way.
-bool G3SuperTimestream::SetDataFromBuffer(void* buf, int ndim, int shape[], int typenum,
-					 std::pair<int,int> sample_range)
-{
-	if (ndim != 2)
-		throw g3supertimestream_exception(
-			"2d arrays only please");
-
-	// Create a new numpy array for this, allowing for slice in
-	// second dimension..
-	int n_samps = sample_range.second - sample_range.first;
-	npy_intp shape_[2] = {shape[0], n_samps};
-	auto array_ = (PyArrayObject*)PyArray_EMPTY(ndim, shape_, typenum, 0);
-	bp::object array_ob =
-		bp::object(bp::handle<>((reinterpret_cast<PyObject*>(array_))));
-
-	for (int i=0; i<shape[0]; i++) {
-		memcpy((char*)PyArray_DATA(array_) + PyArray_STRIDES(array_)[0] * i,
-		       (char*)buf + (sample_range.first + shape[1] * i) * PyArray_ITEMSIZE(array_),
-		       PyArray_STRIDES(array_)[1] * n_samps);
-	}
-
-	safe_set_data(*this, array_ob);
-	return true;
-}
-
-
-// Assist with testing the pure C++ interface
-static
-G3SuperTimestreamPtr test_cxx_interface(int nsamps, int first, int second)
-{
-	int shape[2] = {3, nsamps};
-	int typenum = NPY_INT32;
-	std::vector<int32_t> buf(shape[0] * shape[1]);
-	std::fill(buf.begin(), buf.end(), 0);
-
-	auto times = G3VectorTime();
-	for (int i=first; i<second; i++)
-		times.push_back(G3Time::Now());
-	const char *chans_[] = {"a", "b", "c"};
-	auto chans = G3VectorString(chans_, std::end(chans_));
-
-	auto ts = G3SuperTimestreamPtr(
-		new G3SuperTimestream(chans, times));
-
-	for (int i=first; i<second; i++)
-		buf[i] = 77;
-
-	ts->SetDataFromBuffer((void*)buf.data(), 2, shape, typenum,
-			      std::pair<int,int>(first, second));
-
-	return ts;
-}
-
-
-G3_SPLIT_SERIALIZABLE_CODE(G3SuperTimestream);
-
-static void translate_ValueError(g3supertimestream_exception const& e)
-{
-	PyErr_SetString(PyExc_ValueError, e.msg_for_python().c_str());
-}
-
-
-PYBINDINGS("so3g")
-{
-	bp::docstring_options local_docstring_options(true, false);
-
-	EXPORT_FRAMEOBJECT(G3SuperTimestream, init<>(), "G3SuperTimestream()\n\n"
-		"  Construct with names and times uninitialized.")
-		.def(bp::init<const G3VectorString&, const G3VectorTime&>(
-			     "G3SuperTimestream(names, times)\n"
-			     "  Construct with names and times initialized."))
-		.def(bp::init<const G3VectorString&, const G3VectorTime&,
-		     bp::object&>(
-			     "G3SuperTimestream(names, times, data)\n\n"
-			     "  Construct with integer data."))
-		.def(bp::init<const G3VectorString&, const G3VectorTime&,
-		     bp::object&, const std::vector<double>&>(
-			     "G3SuperTimestream(names, times, data, quanta)\n\n"
-			     "  Construct with float data."))
-		.add_property("times", &G3SuperTimestream::times, &safe_set_times,
-			      "Vector of timestamps (G3VectorTime).")
-		.add_property("names", &G3SuperTimestream::names, &safe_set_names,
-			      "Vector of channel names.")
-		.add_property("data", &safe_get_data, &safe_set_data,
-			      "Data array.")
-		.add_property("quanta", &safe_get_quanta, &safe_set_quanta,
-			      "Quanta (if float mode).")
-		.add_property("dtype", &safe_get_dtype, "Numpy dtype of enclosed array.")
-		.def("encode", &G3SuperTimestream::Encode, "Compress.")
-		.def("decode", &G3SuperTimestream::Decode, "Decompress.")
-		.def("extract", &G3SuperTimestream::Extract,
-		     (bp::arg("dest"), bp::arg("dest_indices")=bp::object(),
-		      bp::arg("src_indices")=bp::object(),
-		      bp::arg("start")=0, bp::arg("stop")=-1),
-		     "Decompress data subset into an array.")
-		.def("calibrate", &G3SuperTimestream::Calibrate,
-		     "calibrate(cal_factors)\n\n"
-                     "Apply per-channel scale factors.  Note this puts you in float mode\n"
-		     "(if you're not already) and modifies both .quanta and .data.")
-		.def("options", &G3SuperTimestream::Options,
-		     (bp::arg("enable")=-1,
-		      bp::arg("flac_level")=-1, bp::arg("bz2_workFactor")=-1,
-		      bp::arg("data_algo")=-1, bp::arg("times_algo")=-1),
-		     "Set compression options.  To disable compression, pass enable=0.  To "
-		     "enable default compression options, pass enable=1.  Otherwise, pass "
-		     "specific values for data_algo and times_algo (see enums in C++ layer).")
-		;
-	register_pointer_conversions<G3SuperTimestream>();
-
-	bp::register_exception_translator<g3supertimestream_exception>(&translate_ValueError);
-	bp::def("test_g3super", test_cxx_interface);
-}
diff --git a/src/Intervals.cxx b/src/Intervals.cxx
index ee0e9297..dcb2f351 100644
--- a/src/Intervals.cxx
+++ b/src/Intervals.cxx
@@ -1,12 +1,14 @@
 #define NO_IMPORT_ARRAY
 
-#include <pybindings.h>
-
 #include <iostream>
 #include <limits>
 #include <type_traits>
 
-#include <boost/python.hpp>
+#include <nanobind/nanobind.h>
+#include <nanobind/operators.h>
+#include <nanobind/stl/tuple.h>
+#include <nanobind/stl/string.h>
+#include <nanobind/ndarray.h>
 
 #include "so3g_numpy.h"
 
@@ -14,6 +16,9 @@
 #include "exceptions.h"
 #include <exception>
 
+namespace nb = nanobind;
+
+
 //
 // Default constructors, explicitly defined for each type, to set a
 // sensible (perhaps) default domain.
@@ -35,19 +40,14 @@ Intervals<int32_t>::Intervals() {
     domain = make_pair(INT32_MIN, INT32_MAX);
 }
 
-// The G3Time internal encoding is an int64 with the number of 100 MHz
-// ticks since the unix epoch.  Make our default domain span from a
-// while ago to a while from now.
-
-#define G3TIME_LO                   0LL  // Jan 1 1970
-#define G3TIME_HI  725811840000000000LL  // Jan 1 2200
-
-template <>
-Intervals<G3Time>::Intervals() {
-    domain = make_pair(G3Time(G3TIME_LO), G3Time(G3TIME_HI));
+template <typename T>
+Intervals<T>::Intervals(Intervals<T> const & other) {
+    std::cerr << "Copy constructor input = " << other.Description() << std::endl;
+    domain = other.domain;
+    segments = other.segments;
+    std::cerr << "Copy constructor output = " << Description() << std::endl;
 }
 
-
 //
 // Some support templates for Description() -- these are broadly
 // applicable so consider having them live more publicly.
@@ -63,8 +63,6 @@ template <>
 const char *_ival_type_name<int64_t>() { return "Int"; }
 template <>
 const char *_ival_type_name<double> () { return "Double"; }
-template <>
-const char *_ival_type_name<G3Time> () { return "Time"; }
 
 // _ival_cute_lim() allows standard limits (such as INT32_MAX) to be printed as such.
 
@@ -147,20 +145,34 @@ void Intervals<T>::cleanup()
 template <typename T>
 Intervals<T>& Intervals<T>::add_interval(const T start, const T end)
 {
+    ostringstream dbg;
+    dbg << "DBG add_interval (" << start << "," << end << ")";
+    std::cerr << dbg.str() << std::endl;
     // We can optimize this later.  For now just do something that is
     // obviously correct.
     auto p = lower_bound(segments.begin(), segments.end(), make_pair(start, end));
     segments.insert(p, make_pair(start, end));
     cleanup();
 
+    dbg.str("");
+    dbg << "DBG segments size now " << segments.size();
+    std::cerr << dbg.str() << std::endl;
+
     return *this;
 }
 
 template <typename T>
 Intervals<T>& Intervals<T>::append_interval_no_check(const T start, const T end)
 {
+    ostringstream dbg;
+    dbg << "DBG add_interval_no_check (" << start << "," << end << ")";
+    std::cerr << dbg.str() << std::endl;
     segments.push_back(make_pair(start, end));
 
+    dbg.str("");
+    dbg << "DBG segments size now " << segments.size();
+    std::cerr << dbg.str() << std::endl;
+
     return *this;
 }
 
@@ -207,13 +219,6 @@ pair<T,T> interval_pair(char *p1, char *p2) {
                      *reinterpret_cast<T*>(p2));
 }
 
-template <>
-inline
-pair<G3Time,G3Time> interval_pair(char *p1, char *p2) {
-    return make_pair(G3Time(*reinterpret_cast<G3TimeStamp*>(p1)),
-                     G3Time(*reinterpret_cast<G3TimeStamp*>(p2)));
-}
-
 
 template <typename T>
 static inline
@@ -224,15 +229,6 @@ int interval_extract(const std::pair<T,T> *src, char *dest) {
     return 2 * sizeof(*Tdest);
 }
 
-template <>
-inline
-int interval_extract(const std::pair<G3Time,G3Time> *src, char *dest) {
-    auto Tdest = reinterpret_cast<G3TimeStamp*>(dest);
-    *(Tdest) = src->first.time;
-    *(Tdest+1) = src->second.time;
-    return 2 * sizeof(*Tdest);
-}
-
 template <typename T>
 static inline int get_dtype() {
     return NPY_NOTYPE;
@@ -253,11 +249,6 @@ inline int get_dtype<double>() {
     return NPY_FLOAT64;
 }
 
-template <>
-inline int get_dtype<G3Time>() {
-    return NPY_INT64;
-}
-
 template <typename T>
 static int format_to_dtype(const BufferWrapper<T> &view)
 {
@@ -300,43 +291,57 @@ static int format_to_dtype(const BufferWrapper<T> &view)
         case 8:
             return NPY_FLOAT64;
         }
-    } 
+    }
 
     return NPY_NOTYPE;
 }
 
-
 template <typename T>
-Intervals<T> Intervals<T>::from_array(const bp::object &src)
+Intervals<T> * Intervals<T>::from_array(const nb::object & src)
 {
-    Intervals<T> output;
-    BufferWrapper<T> buf("src", src, false, vector<int>{-1, 2});
+    Intervals<T> * output = new Intervals<T>();
 
+    BufferWrapper<T> buf("src", src, false, vector<int>{-1, 2});
     char *d = (char*)buf->buf;
-    int n_seg = buf->shape[0];
-    for (int i=0; i<n_seg; ++i) {
-        output.segments.push_back(interval_pair<T>(d, d+buf->strides[1]));
+    size_t n_seg = buf->shape[0];
+
+    std::cerr << "Intervals from_array shape = " << buf->shape[0] << "," << buf->shape[1] << std::endl;
+    std::cerr << "Intervals from_array strides = " << buf->strides[0] << "," << buf->strides[1] << std::endl;
+
+    for (size_t i = 0; i < n_seg; ++i) {
+        std::pair<T,T> pr = interval_pair<T>(d, d+(buf->strides[1]));
+        std::cerr << "Intervals from_array seg " << i << " pair = " << pr.first << "," << pr.second << std::endl;
+        output->segments.push_back(pr);
+        //output.segments.push_back(interval_pair<T>(d, d+buf->strides[1]));
         d += buf->strides[0];
+        std::cerr << "Intervals from_array segments size now " << output->segments.size() << std::endl;
     }
-    
+
     return output;
 }
 
 template <typename T>
-bp::object Intervals<T>::array() const
+nb::object Intervals<T>::array() const
 {
-    npy_intp dims[2] = {0, 2};
+    npy_intp dims[2];
+    std::cerr << "Intervals segments.size() = " << segments.size() << std::endl;
     dims[0] = (npy_intp)segments.size();
+    dims[1] = 2;
     int dtype = get_dtype<T>();
     if (dtype == NPY_NOTYPE)
         throw general_agreement_exception("array() not implemented for this domain dtype.");
-
     PyObject *v = PyArray_SimpleNew(2, dims, dtype);
+    if (v == NULL) {
+        ostringstream dstr;
+        dstr << "Failed to allocate Intervals numpy array of size (";
+        dstr << dims[0] << ", " << dims[1] << ")";
+        throw RuntimeError_exception(dstr.str().c_str());
+    }
     char *ptr = reinterpret_cast<char*>((PyArray_DATA((PyArrayObject*)v)));
     for (auto p = segments.begin(); p != segments.end(); ++p) {
         ptr += interval_extract((&*p), ptr);
     }
-    return bp::object(bp::handle<>(v));
+    return nb::steal<nb::object>(v);
 }
 
 
@@ -363,16 +368,16 @@ bp::object Intervals<T>::array() const
 template <typename intType, typename numpyType,
           typename std::enable_if<!std::is_integral<intType>::value,
                                   int>::type* = nullptr>
-static inline bp::object from_mask_(void *buf, intType count, int n_bits)
+static inline nb::object from_mask_(void *buf, intType count, int n_bits)
 {
     throw dtype_exception("target", "Interval<> over integral type.");
-    return bp::object();
+    return nb::object();
 }
 
 template <typename intType, typename numpyType,
           typename std::enable_if<std::is_integral<intType>::value,
                                   int>::type* = nullptr>
-static inline bp::object from_mask_(void *buf, intType count, int n_bits)
+static inline nb::object from_mask_(void *buf, intType count, int n_bits)
 {
     if (n_bits < 0)
         n_bits = 8*sizeof(numpyType);
@@ -409,14 +414,14 @@ static inline bp::object from_mask_(void *buf, intType count, int n_bits)
     }
 
     // Once added to the list, we can't modify further.
-    bp::list bits;
+    nb::list bits;
     for (auto i: output)
         bits.append(i);
     return bits;
 }
 
 template <typename T>
-bp::object Intervals<T>::from_mask(const bp::object &src, int n_bits)
+nb::object Intervals<T>::from_mask(const nb::object &src, int n_bits)
 {
     BufferWrapper<T> buf("src", src, false);
 
@@ -443,7 +448,7 @@ bp::object Intervals<T>::from_mask(const bp::object &src, int n_bits)
     }
 
     throw dtype_exception("src", "integer type");
-    return bp::object();
+    return nb::object();
 }
 
 
@@ -458,31 +463,40 @@ bp::object Intervals<T>::from_mask(const bp::object &src, int n_bits)
 
 template <typename intType,typename std::enable_if<!std::is_integral<intType>::value,
                                                    int>::type* = nullptr>
-static inline bp::object mask_(const bp::list &ivlist, int n_bits)
+static inline nb::object mask_(const nb::list &ivlist, int n_bits)
 {
     intType x;
     throw dtype_exception("ivlist", "Interval<> over integral type.");
-    return bp::object();
+    return nb::object();
 }
 
 template <typename intType, typename std::enable_if<std::is_integral<intType>::value,
                                                     int>::type* = nullptr>
-static inline bp::object mask_(const bp::list &ivlist, int n_bits)
+static inline nb::object mask_(const nb::list &ivlist, int n_bits)
 {
     vector<Intervals<intType>> ivals;
     vector<int> indexes;
 
     pair<intType,intType> domain;
 
-    for (long i=0; i<bp::len(ivlist); i++) {
+    for (long i=0; i<nb::len(ivlist); i++) {
+        std::cerr << "Intervals mask processing list item " << i << std::endl;
         indexes.push_back(0);
-        ivals.push_back(bp::extract<Intervals<intType>>(ivlist[i]));
+        Intervals<intType> cur = nb::cast<Intervals<intType>>(ivlist[i]);
+        std::cerr << "Intervals mask cast cur domain = " << cur.domain.first << "," << cur.domain.second << std::endl;
+
         if (i==0) {
-            domain = ivals[i].domain;
-        } else if (domain != ivals[i].domain) {
+            domain = cur.domain;
+            std::cerr << "Intervals mask set domain to " << domain.first << "," << domain.second << std::endl;
+        } else if (domain == cur.domain) {
+            std::cerr << "Intervals mask domain agrees, append" << std::endl;
+            ivals.push_back(cur);
+        } else {
+            std::cerr << "Intervals mask domain mismatch" << std::endl;
             throw agreement_exception("ivlist[0]", "all other ivlist[i]", "domain");
         }
     }
+    std::cerr << "Intervals mask done processing list" << std::endl;
 
     // Determine the output mask size based on n_bits... which may be unspecified.
     int npy_type = NPY_UINT8;
@@ -491,6 +505,7 @@ static inline bp::object mask_(const bp::list &ivlist, int n_bits)
     else if (n_bits < ivals.size())
         throw general_agreement_exception("Input list has more items than the "
                                           "output mask size (n_bits).");
+    std::cerr << "Intervals mask using n_bits = " << n_bits << std::endl;
 
     if (n_bits <= 8)
         npy_type = NPY_UINT8;
@@ -506,27 +521,42 @@ static inline bp::object mask_(const bp::list &ivlist, int n_bits)
             << " requested to encode this mask.";
         throw general_agreement_exception(err.str());
     }
+    std::cerr << "Intervals mask use npy_type = " << npy_type << std::endl;
 
     int n = domain.second - domain.first;
-    npy_intp dims[1] = {n};
+    npy_intp dims[1];
+    dims[0] = n;
+
+    std::cerr << "Intervals mask allocate 1D array of len " << n << std::endl;
     PyObject *v = PyArray_SimpleNew(1, dims, npy_type);
+    if (v == NULL) {
+        ostringstream dstr;
+        dstr << "Failed to allocate Intervals mask array of size (";
+        dstr << dims[0] << ",)";
+        throw RuntimeError_exception(dstr.str().c_str());
+    }
 
     // Assumes little-endian.
     int n_byte = PyArray_ITEMSIZE((PyArrayObject*)v);
+    std::cerr << "Intervals mask n_byte = " << n_byte << std::endl;
+
     uint8_t *ptr = reinterpret_cast<uint8_t*>((PyArray_DATA((PyArrayObject*)v)));
     memset(ptr, 0, n*n_byte);
     for (long bit=0; bit<ivals.size(); ++bit) {
         for (auto p: ivals[bit].segments) {
-            for (int i=p.first - domain.first; i<p.second - domain.first; i++)
+            for (int i=p.first - domain.first; i<p.second - domain.first; i++) {
+                std::cerr << "Intervals mask bit " << bit << ", i " << i << ": ptr[" << i*n_byte + bit/8 << "] |= " << (1<<(bit%8)) << std::endl;
                 ptr[i*n_byte + bit/8] |= (1<<(bit%8));
+            }
         }
     }
 
-    return bp::object(bp::handle<>(v));
+    std::cerr << "Intervals mask return" << std::endl;
+    return nb::steal<nb::object>(v);
 }
 
 template <typename T>
-bp::object Intervals<T>::mask(const bp::list &ivlist, int n_bits)
+nb::object Intervals<T>::mask(const nb::list &ivlist, int n_bits)
 {
     return mask_<T>(ivlist, n_bits);
 }
@@ -535,7 +565,7 @@ bp::object Intervals<T>::mask(const bp::list &ivlist, int n_bits)
 //
 // Implementation of the algebra
 //
- 
+
 template <typename T>
 Intervals<T>& Intervals<T>::intersect(const Intervals<T> &src)
 {
@@ -544,7 +574,7 @@ Intervals<T>& Intervals<T>::intersect(const Intervals<T> &src)
     *this = output.complement();
     return *this;
 }
-    
+
 template <typename T>
 void Intervals<T>::set_domain(T start, T end)
 {
@@ -579,7 +609,7 @@ Intervals<T> Intervals<T>::complement() const
 template <typename T,
           typename std::enable_if<!std::is_integral<T>::value,
                                   int>::type* = nullptr>
-static inline Intervals<T> _getitem_(Intervals<T> &src, bp::object indices)
+static inline Intervals<T> _getitem_(Intervals<T> &src, nb::object indices)
 {
     throw dtype_exception("target", "Interval<> over integral type.");
     return Intervals<T>();
@@ -588,25 +618,29 @@ static inline Intervals<T> _getitem_(Intervals<T> &src, bp::object indices)
 template <typename objType, typename T>
 static inline T extract_or_default(objType src, T default_)
 {
-    bp::extract<T> ex(src);
-    if (ex.check())
-        return ex();
-    return default_;
+    T result;
+    if (nb::try_cast<T>(src, result)) {
+        // Successful cast
+        return result;
+    } else {
+        return default_;
+    }
 }
 
 template <typename T,
           typename std::enable_if<std::is_integral<T>::value,
                                   int>::type* = nullptr>
-static inline Intervals<T> _getitem_(Intervals<T> &src, bp::object indices)
+static inline Intervals<T> _getitem_(Intervals<T> &src, nb::object indices)
 {
-    bp::extract<bp::slice> ex(indices);
-    if (ex.check()) {
+    if (nb::isinstance<nb::slice>(indices)) {
+        nb::slice sl = nb::cast<nb::slice>(indices);
+
         T count = src.domain.second - src.domain.first;
+        auto slc_par = sl.compute(count);
 
-        auto sl = ex();
-        T start = extract_or_default(sl.start(), 0);
-        T stop = extract_or_default(sl.stop(), count);
-        T step = extract_or_default(sl.step(), 1);
+        T start = slc_par.template get<0>();
+        T stop = slc_par.template get<1>();
+        T step = slc_par.template get<2>();
 
         assert(step == 1);
         if (start < 0)
@@ -638,7 +672,7 @@ static inline Intervals<T> _getitem_(Intervals<T> &src, bp::object indices)
 }
 
 template <typename T>
-Intervals<T> Intervals<T>::getitem(bp::object indices)
+Intervals<T> Intervals<T>::getitem(nb::object indices)
 {
     return _getitem_(*this, indices);
 }
@@ -695,81 +729,139 @@ Intervals<T> Intervals<T>::operator*(const Intervals<T> &src) const
     return output;
 }
 
-//
-// boost-python registration.
-//
 
-using namespace boost::python;
-
-#define EXPORT_INTERVALS(DOMAIN_TYPE, CLASSNAME) \
-    bp::class_<CLASSNAME>(#CLASSNAME,                                   \
-        "A finite series of non-overlapping semi-open intervals on a "  \
-        "domain of type: " #DOMAIN_TYPE ".")                            \
-    .def(init<const DOMAIN_TYPE&, const DOMAIN_TYPE&>("Initialize with domain.")) \
-    .def("__str__", &CLASSNAME::Description) \
-    .def("add_interval", &CLASSNAME::add_interval,                      \
-         return_internal_reference<>(),                                 \
-         args("self", "start", "end"),                                  \
-         "Merge an interval into the set.")                             \
-    .def("append_interval_no_check", &CLASSNAME::append_interval_no_check, \
-         return_internal_reference<>(),                                 \
-         args("self", "start", "end"),                                  \
-         "Append an interval to the set without checking for overlap or sequence.") \
-    .def("merge", &CLASSNAME::merge,                                    \
-         return_internal_reference<>(),                                 \
-         "Merge an Intervals into the set.")                            \
-    .def("intersect", &CLASSNAME::intersect,                            \
-         return_internal_reference<>(),                                 \
-         args("self", "source"),                                        \
-         "Intersect another " #DOMAIN_TYPE "with this one.")            \
-    .add_property(                                                      \
-        "domain",                                                       \
-        +[](const CLASSNAME& A) {                                       \
-             return make_tuple( A.domain.first, A.domain.second );      \
-         },                                                             \
-        +[](CLASSNAME& A, object _domain) {                             \
-             A.set_domain(extract<DOMAIN_TYPE>(_domain[0]),             \
-                          extract<DOMAIN_TYPE>(_domain[1]));            \
-         },                                                             \
-        "Interval set domain (settable, with consequences).")           \
-    .def("complement", &CLASSNAME::complement,                          \
-         "Return the complement (over domain).")                        \
-    .def("array", &CLASSNAME::array,                                    \
-         "Return the intervals as a 2-d numpy array.")                  \
-    .def("from_array", &CLASSNAME::from_array,                          \
-         args("input_array"),                                           \
-         "Return a " #CLASSNAME " based on an (n,2) ndarray.")          \
-    .staticmethod("from_array")                                         \
-    .def("from_mask", &CLASSNAME::from_mask,                            \
-         args("input_array", "n_bits"),                                 \
-         "Return a list of " #CLASSNAME ", extracted from the first \n" \
-         "n_bits bits of input_array (a 1-d array of integer type).")   \
-    .staticmethod("from_mask")                                          \
-    .def("mask", &CLASSNAME::mask,                                      \
-         args("intervals_list", "n_bits"),                              \
-         "Return an ndarray bitmask from a list of " #CLASSNAME ".\n"   \
-         "The dtype will be the smallest available to hold n_bits.")    \
-    .staticmethod("mask")                                               \
-    .def("copy",                                                        \
-         +[](CLASSNAME& A) {                                            \
-              return CLASSNAME(A);                                      \
-          },                                                            \
-         "Get a new object with a copy of the data.")                   \
-    .def("__getitem__", &CLASSNAME::getitem)                            \
-    .def(-self)                                                         \
-    .def(~self)                                                         \
-    .def(self += self)                                                  \
-    .def(self -= self)                                                  \
-    .def(self + self)                                                   \
-    .def(self - self)                                                   \
-    .def(self * self);
-
-
-PYBINDINGS("so3g")
-{
-    docstring_options local_docstring_options(true, true, false);
-    EXPORT_INTERVALS(double,  IntervalsDouble);
-    EXPORT_INTERVALS(int64_t, IntervalsInt);
-    EXPORT_INTERVALS(int32_t, IntervalsInt32);
-    EXPORT_INTERVALS(G3Time,  IntervalsTime);
+// Helper function to register an Intervals class for a concrete type.
+
+template <typename C>
+void intervals_bindings(nb::module_ & m, char const * name) {
+
+    nb::class_<Intervals<C>>(m, name)
+        .def(nb::init<>())
+        .def(nb::init<C, C>(),
+            R"(
+            A finite series of non-overlapping semi-open intervals
+            )"
+        )
+        .def("__str__", &Intervals<C>::Description)
+        .def("add_interval", &Intervals<C>::add_interval, nb::rv_policy::none,
+            nb::arg("start"),
+            nb::arg("end"),
+            R"(
+            Merge an interval into the set.
+            )"
+        )
+        .def("append_interval_no_check", &Intervals<C>::append_interval_no_check,
+            nb::rv_policy::none,
+            nb::arg("start"),
+            nb::arg("end"),
+            R"(
+            Append an interval to the set without checking for overlap or sequence.
+            )"
+        )
+        .def("merge", &Intervals<C>::merge, nb::rv_policy::none,
+            R"(
+            Merge an Intervals into the set.
+            )"
+        )
+        .def("intersect", &Intervals<C>::intersect, nb::rv_policy::none,
+            nb::arg("source"),
+            R"(
+            Intersect another Intervals object with this one.
+            )"
+        )
+        .def_prop_rw("domain",
+            [](Intervals<C> & slf) {
+                auto dom = slf.get_domain();
+                return nb::make_tuple(dom.first, dom.second);
+            },
+            [](Intervals<C> & slf, nb::object value) {
+                if (nb::isinstance<nb::list>(value)) {
+                    auto v = nb::cast<nb::list>(value);
+                    if (v.size() != 2) {
+                        throw shape_exception("domain", "!= 2");
+                    }
+                    slf.set_domain(nb::cast<C>(v[0]), nb::cast<C>(v[1]));
+                } else if (nb::isinstance<nb::tuple>(value)) {
+                    auto v = nb::cast<nb::tuple>(value);
+                    if (v.size() != 2) {
+                        throw shape_exception("domain", "!= 2");
+                    }
+                    slf.set_domain(nb::cast<C>(v[0]), nb::cast<C>(v[1]));
+                } else {
+                    throw general_agreement_exception(
+                        "Only list or tuple values can be used to set domain"
+                    );
+                }
+            },
+            R"(
+            Interval set domain (settable, with consequences).
+            )"
+        )
+        .def("complement", &Intervals<C>::complement, nb::rv_policy::take_ownership,
+            R"(
+            Return the complement (over domain).
+            )"
+        )
+        .def(
+            "copy",
+            [](Intervals<C> & slf) {
+                auto obj = Intervals<C>(slf);
+                std::cerr << "DBG bindings: " << obj.Description() << std::endl;
+                return obj;
+            }, nb::rv_policy::move,
+            R"(
+            Get a new object with a copy of the data.
+            )"
+        )
+        .def_static("from_array", &Intervals<C>::from_array,
+            nb::rv_policy::take_ownership,
+            nb::arg("input_array"),
+            R"(
+            Return an Intervals object based on an (n,2) ndarray.
+            )"
+        )
+        .def("array", &Intervals<C>::array, nb::rv_policy::take_ownership,
+            R"(
+            Return the intervals as a 2-d numpy array.
+            )"
+        )
+        .def("__getitem__", &Intervals<C>::getitem, nb::rv_policy::take_ownership)
+        .def_static("from_mask", &Intervals<C>::from_mask,
+            nb::rv_policy::take_ownership,
+            nb::arg("input_array"),
+            nb::arg("n_bits"),
+            R"(
+            Return a list Intervals.
+
+            The Intervals are extracted from the first n_bits of the input_array
+            (a 1-D array of integral type).
+            )"
+        )
+        .def_static("mask", &Intervals<C>::mask, nb::rv_policy::take_ownership,
+            nb::arg("intervals_list"),
+            nb::arg("n_bits"),
+            R"(
+            Return an ndarray bitmask from a list of Intervals.
+
+            The dtype will be the smallest available to hold n_bits.
+            )"
+        )
+        .def(-nb::self)
+        .def(~nb::self)
+        .def(nb::self += nb::self)
+        .def(nb::self -= nb::self)
+        .def(nb::self + nb::self)
+        .def(nb::self - nb::self)
+        .def(nb::self * nb::self);
+
+    return;
+}
+
+
+void register_intervals(nb::module_ & m) {
+    // Concrete intervals types
+    intervals_bindings<double>(m, "IntervalsDouble");
+    intervals_bindings<int64_t>(m, "IntervalsInt");
+    intervals_bindings<int32_t>(m, "IntervalsInt32");
+    return;
 }
diff --git a/src/Projection.cxx b/src/Projection.cxx
index aabf5599..568efb4a 100644
--- a/src/Projection.cxx
+++ b/src/Projection.cxx
@@ -1,30 +1,30 @@
 #define NO_IMPORT_ARRAY
 
-// debug
 #include <iostream>
-using namespace std;
-
-#include <pybindings.h>
-
 #include <assert.h>
-#include <math.h>
+#include <cmath>
+#include <cstdio>
 
 #ifdef _OPENMP
 # include <omp.h>
 #endif // ifdef _OPENMP
-#include <boost/math/quaternion.hpp>
 
-#include <container_pybindings.h>
+#include <nanobind/nanobind.h>
 
+#include "quaternion.h"
 #include "so3g_numpy.h"
 #include "Projection.h"
 #include "Ranges.h"
 #include "exceptions.h"
 #include "so_linterp.h"
 
-#include <stdio.h>
+using namespace std;
+
+namespace nb = nanobind;
+
 #include "healpix_bare.c"
 
+
 // TRIG_TABLE_SIZE
 //
 // Set this macro to enable trig interpolation tables.  Studies show
@@ -44,9 +44,9 @@ static atan2Table atan2_lookup(TRIG_TABLE_SIZE);
 #endif
 
 
-typedef boost::math::quaternion<double> quatd;
+typedef quaternion<double> quatd;
 
-inline bool isNone(const bp::object &pyo)
+inline bool isNone(const nb::object &pyo)
 {
     return (pyo.ptr() == Py_None);
 }
@@ -130,7 +130,7 @@ inline bool isNone(const bp::object &pyo)
 // implemented as follows.
 //
 // At the top level, the functions where the responsivity is relevant, like
-// from_map, to_map etc. take a bp::object reponse argument, representing
+// from_map, to_map etc. take a nb::object reponse argument, representing
 // the [ndet,2] detector responsivities.
 //
 // This is then converted into a BufferWrapper<FSIGNAL>, before each detector's
@@ -182,8 +182,8 @@ class SpinTQU : public SpinClass<3> {};
 template <typename CoordSys>
 class Pointer {
 public:
-    bool TestInputs(bp::object &map, bp::object &pbore, bp::object &pdet,
-                    bp::object &signal, bp::object &det_weights);
+    bool TestInputs(nb::object &map, nb::object &pbore, nb::object &pdet,
+                    nb::object &signal, nb::object &det_weights);
     void InitPerDet(int i_det, double *dofs);
     int DetCount() { return n_det; }
     int TimeCount() { return n_time; }
@@ -198,8 +198,8 @@ class Pointer {
 
 template <typename CoordSys>
 bool Pointer<CoordSys>::TestInputs(
-    bp::object &map, bp::object &pbore, bp::object &pdet,
-    bp::object &signal, bp::object &det_weights)
+    nb::object &map, nb::object &pbore, nb::object &pdet,
+    nb::object &signal, nb::object &det_weights)
 {
     // Boresight and Detector must present and inter-compatible.
     _pborebuf = BufferWrapper<double>("boresight", pbore, false,
@@ -531,18 +531,18 @@ class Pixelizor_Healpix<NonTiled> {
         check_nside(nside);
     };
     Pixelizor_Healpix() {};
-    Pixelizor_Healpix(bp::object args) {
-        bp::tuple args_tuple = bp::extract<bp::tuple>(args);
+    Pixelizor_Healpix(nb::object args) {
+        nb::tuple args_tuple = nb::cast<nb::tuple>(args);
         // args[0]: int nside
-        nside = bp::extract<int>(args_tuple[0])();
+        nside = nb::cast<int>(args_tuple[0]);
         // args[1]: bool isnest
-        nest = bp::extract<bool>(args_tuple[1])();
+        nest = nb::cast<bool>(args_tuple[1]);
         npix = nside2npix(nside);
         check_nside(nside);
     }
     ~Pixelizor_Healpix() {};
 
-    bp::object zeros(vector<int> shape) {
+    nb::object zeros(vector<int> shape) {
         shape.push_back(npix);
         int ndim = 0;
         npy_intp dims[32];
@@ -551,7 +551,7 @@ class Pixelizor_Healpix<NonTiled> {
 
         int dtype = NPY_FLOAT64;
         PyObject *v = PyArray_ZEROS(ndim, dims, dtype, 0);
-        return bp::object(bp::handle<>(v));
+        return nb::steal<nb::object>(v);
     }
 
     inline
@@ -573,7 +573,7 @@ class Pixelizor_Healpix<NonTiled> {
         return 1;
     }
 
-    bool TestInputs(bp::object &map, bool need_map, bool need_weight_map, int comp_count) {
+    bool TestInputs(nb::object &map, bool need_map, bool need_weight_map, int comp_count) {
         if (need_map) {
             // The map is mandatory, and the leading axis must match the
             // component count.  Then the one healpix pixel axis.
@@ -637,27 +637,27 @@ class Pixelizor_Healpix<Tiled> {
         check_nside(nside);
     };
     Pixelizor_Healpix() {};
-    Pixelizor_Healpix(bp::object args) {
+    Pixelizor_Healpix(nb::object args) {
         // args[0]: int nside
-        bp::tuple args_tuple = bp::extract<bp::tuple>(args);
-        nside = bp::extract<int>(args_tuple[0])();
+        nb::tuple args_tuple = nb::cast<nb::tuple>(args);
+        nside = nb::cast<int>(args_tuple[0]);
         // args[1]: bool nestin: MUST BE true ; check that
-        bool nestin = bp::extract<bool>(args_tuple[1])(); // "nest" argument, not used for tiled maps
+        bool nestin = nb::cast<bool>(args_tuple[1]); // "nest" argument, not used for tiled maps
         if (! nestin){
             std::ostringstream err;
             err << "RING not supported for tiled maps";
             throw ValueError_exception(err.str());
         }
         // args[2] int nside_tile; nside defining the tiling
-        int nside_tile = bp::extract<int>(args_tuple[2])();
+        int nside_tile = nb::cast<int>(args_tuple[2]);
         ntiles = nside2npix(nside_tile);
-        if (bp::len(args) >= 4) {
+        if (nb::len(args) >= 4) {
             // args[3] list(int) of indexes for active tiles
-            bp::object active_tiles = bp::extract<bp::object>(args_tuple[3])();
+            nb::object active_tiles = nb::cast<nb::object>(args_tuple[3]);
             if (! isNone(active_tiles)){
                 populate = vector<bool>(ntiles, false);
-                 for (int i=0; i < bp::len(active_tiles); i++) {
-                     int idx = PyLong_AsLong(bp::object(active_tiles[i]).ptr());
+                 for (int i=0; i < nb::len(active_tiles); i++) {
+                     int idx = PyLong_AsLong(nb::object(active_tiles[i]).ptr());
                      if (idx >= 0 && idx < ntiles)
                          populate[idx] = true;
                  }
@@ -675,7 +675,7 @@ class Pixelizor_Healpix<Tiled> {
     }
   ~Pixelizor_Healpix() {};
 
-    bp::object zeros(vector<int> shape) {
+    nb::object zeros(vector<int> shape) {
         int dtype = NPY_FLOAT64;
         int ndim = 0;
         npy_intp dims[32];
@@ -686,16 +686,16 @@ class Pixelizor_Healpix<Tiled> {
             throw RuntimeError_exception("Cannot create blank tiled map unless "
                                   "user has specified what tiles to populate.");
 
-        bp::list maps_out;
+        nb::list maps_out;
         auto pop_iter = populate.begin();
         for (int itile = 0; itile < ntiles; itile++){
             bool pop_this = (pop_iter != populate.end()) && *(pop_iter++);
             if (pop_this) {
                 dims[ndim-1] = npix_per_tile;
                 PyObject *v = PyArray_ZEROS(ndim, dims, dtype, 0);
-                maps_out.append(bp::handle<>(v));
+                maps_out.append(nb::steal<nb::object>(v));
             } else
-                maps_out.append(bp::object());
+                maps_out.append(nb::object());
         }
         return maps_out;
     }
@@ -723,7 +723,7 @@ class Pixelizor_Healpix<Tiled> {
         return 1;
     }
 
-    bool TestInputs(bp::object &map, bool need_map, bool need_weight_map, int comp_count) {
+    bool TestInputs(nb::object &map, bool need_map, bool need_weight_map, int comp_count) {
         vector<int> map_shape_req;
         if (need_map) {
             // The map is mandatory, and the leading axis must match the
@@ -737,7 +737,7 @@ class Pixelizor_Healpix<Tiled> {
         if (map_shape_req.size() == 0)
             return true;
         mapbufs.clear();
-        for (int i_tile = 0; i_tile < bp::len(map); i_tile++) {
+        for (int i_tile = 0; i_tile < nb::len(map); i_tile++) {
             if (isNone(map[i_tile])) {
                 if (populate[i_tile])
                     throw tiling_exception(i_tile, "Projector expects tile but it is missing.");
@@ -818,18 +818,18 @@ class Pixelizor2_Flat<NonTiled, Interpol> {
         crpix[1] = ix0;
     };
     Pixelizor2_Flat() : naxis{1,1} {};
-    Pixelizor2_Flat(bp::object args) {
-        bp::tuple args_tuple = bp::extract<bp::tuple>(args);
-        naxis[0] = bp::extract<int>(args_tuple[0])();
-        naxis[1] = bp::extract<int>(args_tuple[1])();
-        cdelt[0] = bp::extract<double>(args_tuple[2])();
-        cdelt[1] = bp::extract<double>(args_tuple[3])();
-        crpix[0] = bp::extract<double>(args_tuple[4])();
-        crpix[1] = bp::extract<double>(args_tuple[5])();
+    Pixelizor2_Flat(nb::object args) {
+        nb::tuple args_tuple = nb::cast<nb::tuple>(args);
+        naxis[0] = nb::cast<int>(args_tuple[0]);
+        naxis[1] = nb::cast<int>(args_tuple[1]);
+        cdelt[0] = nb::cast<double>(args_tuple[2]);
+        cdelt[1] = nb::cast<double>(args_tuple[3]);
+        crpix[0] = nb::cast<double>(args_tuple[4]);
+        crpix[1] = nb::cast<double>(args_tuple[5]);
     }
     ~Pixelizor2_Flat() {};
 
-    bp::object zeros(vector<int> shape) {
+    nb::object zeros(vector<int> shape) {
         shape.push_back(naxis[0]);
         shape.push_back(naxis[1]);
         int ndim = 0;
@@ -839,7 +839,7 @@ class Pixelizor2_Flat<NonTiled, Interpol> {
 
         int dtype = NPY_FLOAT64;
         PyObject *v = PyArray_ZEROS(ndim, dims, dtype, 0);
-        return bp::object(bp::handle<>(v));
+        return nb::steal<nb::object>(v);
     }
 
     inline
@@ -864,7 +864,7 @@ class Pixelizor2_Flat<NonTiled, Interpol> {
     inline
     int GetPixels(int i_det, int i_time, const double *coords, int pixinds[interp_count][index_count], FSIGNAL pixweights[interp_count]);
 
-    bool TestInputs(bp::object &map, bool need_map, bool need_weight_map, int comp_count) {
+    bool TestInputs(nb::object &map, bool need_map, bool need_weight_map, int comp_count) {
         if (need_map) {
             // The map is mandatory, and the leading axis must match the
             // component count.  And then 2 celestial axes.
@@ -968,25 +968,25 @@ class Pixelizor2_Flat<Tiled, Interpol> {
         tile_shape[0] = tiley;
         tile_shape[1] = tilex;
     };
-    Pixelizor2_Flat(bp::object args) {
+    Pixelizor2_Flat(nb::object args) {
         parent_pix = Pixelizor2_Flat<NonTiled>(args); // first 6...
-        bp::tuple args_tuple = bp::extract<bp::tuple>(args);
+        nb::tuple args_tuple = nb::cast<nb::tuple>(args);
 
-        tile_shape[0] = bp::extract<int>(args_tuple[6])();
-        tile_shape[1] = bp::extract<int>(args_tuple[7])();
+        tile_shape[0] = nb::cast<int>(args_tuple[6]);
+        tile_shape[1] = nb::cast<int>(args_tuple[7]);
         // Check for tile list as arg[8].
-        if (bp::len(args) >= 9) {
+        if (nb::len(args) >= 9) {
             int n_ty = (parent_pix.naxis[0] + tile_shape[0] - 1) / tile_shape[0];
             int n_tx = (parent_pix.naxis[1] + tile_shape[1] - 1) / tile_shape[1];
             populate = vector<bool>(n_ty * n_tx, false);
-            bp::object active_tiles = bp::extract<bp::object>(args_tuple[8])();
-            for (int i=0; i<bp::len(active_tiles); i++) {
+            nb::object active_tiles = nb::cast<nb::object>(args_tuple[8]);
+            for (int i=0; i<nb::len(active_tiles); i++) {
                 // We're using C API instead of boost because this:
-                //   bp::extract<int>(active_tiles[i])
+                //   nb::cast<int>(active_tiles[i])
                 // does not work on an ndarray, nor even on a list of
                 // elements extracted from an array, unless one carefully
                 // casts them to int first.
-                int idx = PyLong_AsLong(bp::object(active_tiles[i]).ptr());
+                int idx = PyLong_AsLong(nb::object(active_tiles[i]).ptr());
                 if (idx >= 0 && idx < n_tx*n_ty)
                     populate[idx] = true;
             }
@@ -994,7 +994,7 @@ class Pixelizor2_Flat<Tiled, Interpol> {
     }
     ~Pixelizor2_Flat() {};
 
-    bp::object zeros(vector<int> shape) {
+    nb::object zeros(vector<int> shape) {
         int dtype = NPY_FLOAT64;
         int ndim = 0;
         npy_intp dims[32];
@@ -1009,7 +1009,7 @@ class Pixelizor2_Flat<Tiled, Interpol> {
             throw shape_exception("zeros", "Cannot create blank tiled map unless "
                                   "user has specified what tiles to populate.");
 
-        bp::list maps_out;
+        nb::list maps_out;
         auto pop_iter = populate.begin();
         for (int i_ty = 0; i_ty < n_ty; i_ty++) {
             for (int i_tx = 0; i_tx < n_tx; i_tx++) {
@@ -1018,9 +1018,9 @@ class Pixelizor2_Flat<Tiled, Interpol> {
                     dims[ndim-2] = min(tile_shape[0], parent_pix.naxis[0] - i_ty * tile_shape[0]);
                     dims[ndim-1] = min(tile_shape[1], parent_pix.naxis[1] - i_tx * tile_shape[1]);
                     PyObject *v = PyArray_ZEROS(ndim, dims, dtype, 0);
-                    maps_out.append(bp::handle<>(v));
+                    maps_out.append(nb::steal<nb::object>(v));
                 } else
-                    maps_out.append(bp::object());
+                    maps_out.append(nb::object());
             }
         }
         return maps_out;
@@ -1052,7 +1052,7 @@ class Pixelizor2_Flat<Tiled, Interpol> {
     inline
     int GetPixels(int i_det, int i_time, const double *coords, int pixinds[interp_count][index_count], FSIGNAL pixweights[interp_count]);
 
-    bool TestInputs(bp::object &map, bool need_map, bool need_weight_map, int comp_count) {
+    bool TestInputs(nb::object &map, bool need_map, bool need_weight_map, int comp_count) {
         vector<int> map_shape_req;
         if (need_map) {
             // The map is mandatory, and the leading axis must match the
@@ -1067,7 +1067,7 @@ class Pixelizor2_Flat<Tiled, Interpol> {
             return true;
 
         mapbufs.clear();
-        for (int i_tile = 0; i_tile < bp::len(map); i_tile++) {
+        for (int i_tile = 0; i_tile < nb::len(map); i_tile++) {
             if (isNone(map[i_tile])) {
                 if (populate[i_tile])
                     throw tiling_exception(i_tile, "Projector expects tile but it is missing.");
@@ -1210,12 +1210,11 @@ void spin_proj_factors<SpinTQU>(const double* coords, const Response & response,
 
 
 template <typename DTYPE>
-bool SignalSpace<DTYPE>::_Validate(bp::object input, std::string var_name,
+bool SignalSpace<DTYPE>::_Validate(nb::object input, std::string var_name,
                                    int dtype)
 {
     // We want a list of arrays here.
-    bp::list sig_list;
-    auto list_extractor = bp::extract<bp::list>(input);
+    nb::list sig_list;
     if (isNone(input)) {
         npy_intp _dims[dims.size()];
         for (int d=0; d<dims.size(); ++d) {
@@ -1225,27 +1224,28 @@ bool SignalSpace<DTYPE>::_Validate(bp::object input, std::string var_name,
         }
         for (int i=0; i<dims[0]; ++i) {
             PyObject *v = PyArray_ZEROS(dims.size()-1, _dims+1, dtype, 0);
-            sig_list.append(bp::object(bp::handle<>(v)));
+            sig_list.append(nb::steal<nb::object>(v));
         }
-    } else if (list_extractor.check()) {
-        sig_list = list_extractor();
+    } else if (nb::try_cast<nb::list>(input, sig_list)) {
+        // input was a list and got extracted by the try_cast.
     } else {
         // Probably an array... listify it.
-        for (int i=0; i<bp::len(input); ++i)
+        for (int i=0; i<nb::len(input); ++i)
             sig_list.append(input[i]);
     }
     ret_val = sig_list;
 
     if (dims[0] == -1) {
-        dims[0] = bp::len(sig_list);
+        dims[0] = nb::len(sig_list);
         if (dims[0] == 0)
             throw shape_exception(var_name, "has not been tested on shape 0 objects");
-    } else if (bp::len(sig_list) != dims[0])
+    } else if (nb::len(sig_list) != dims[0])
         throw shape_exception(var_name, "must contain (n_det) vectors");
 
     const int n_det = dims[0];
 
     // Now extract each list member into our vector of BufferWrappers..
+    // This looks like a memory leak- where is `data_ptr` freed?
     data_ptr = (DTYPE**)calloc(n_det, sizeof(*data_ptr));
 
     bw.reserve(n_det);
@@ -1255,7 +1255,7 @@ bool SignalSpace<DTYPE>::_Validate(bp::object input, std::string var_name,
     vector<int> sub_dims(dims.begin()+1, dims.end());
 
     for (int i=0; i<n_det; i++) {
-        bp::object item = bp::extract<bp::object>(sig_list[i])();
+        nb::object item = nb::cast<nb::object>(sig_list[i]);
         bw.push_back(BufferWrapper<DTYPE>(var_name, item, false, sub_dims));
         if (i == 0) {
             sub_dims.clear();
@@ -1282,7 +1282,7 @@ bool SignalSpace<DTYPE>::_Validate(bp::object input, std::string var_name,
 
 template <typename DTYPE>
 SignalSpace<DTYPE>::SignalSpace(
-    bp::object input, std::string var_name, int dtype, int n_det, int n_time)
+    nb::object input, std::string var_name, int dtype, int n_det, int n_time)
 {
     dims = {n_det, n_time};
     _Validate(input, var_name, dtype);
@@ -1290,7 +1290,7 @@ SignalSpace<DTYPE>::SignalSpace(
 
 template <typename DTYPE>
 SignalSpace<DTYPE>::SignalSpace(
-    bp::object input, std::string var_name, int dtype, int n_det, int n_time,
+    nb::object input, std::string var_name, int dtype, int n_det, int n_time,
     int n_thirdaxis)
 {
     dims = {n_det, n_time, n_thirdaxis};
@@ -1298,7 +1298,7 @@ SignalSpace<DTYPE>::SignalSpace(
 }
 
 template<typename C, typename P, typename S>
-ProjectionEngine<C,P,S>::ProjectionEngine(bp::object pix_args)
+ProjectionEngine<C,P,S>::ProjectionEngine(nb::object pix_args)
 {
     _pixelizor = P(pix_args);
 }
@@ -1314,10 +1314,10 @@ int ProjectionEngine<C,P,S>::comp_count() const {
 }
 
 template<typename C, typename P, typename S>
-bp::object ProjectionEngine<C,P,S>::coords(
-    bp::object pbore, bp::object pofs, bp::object coord)
+nb::object ProjectionEngine<C,P,S>::coords(
+    nb::object pbore, nb::object pofs, nb::object coord)
 {
-    auto _none = bp::object();
+    auto _none = nb::object();
     auto pointer = Pointer<C>();
     pointer.TestInputs(_none, pbore, pofs, _none, _none);
 
@@ -1349,10 +1349,10 @@ bp::object ProjectionEngine<C,P,S>::coords(
 }
 
 template<typename C, typename P, typename S>
-bp::object ProjectionEngine<C,P,S>::pixels(
-    bp::object pbore, bp::object pofs, bp::object pixel)
+nb::object ProjectionEngine<C,P,S>::pixels(
+    nb::object pbore, nb::object pofs, nb::object pixel)
 {
-    auto _none = bp::object();
+    auto _none = nb::object();
 
     auto pointer = Pointer<C>();
     pointer.TestInputs(_none, pbore, pofs, _none, _none);
@@ -1389,10 +1389,10 @@ bp::object ProjectionEngine<C,P,S>::pixels(
 // an [ndet,ntime,{y,x,...}] array which can't handle multiple pixels per
 // sample
 template<typename C, typename P, typename S>
-bp::object ProjectionEngine<C,P,S>::pointing_matrix(
-    bp::object pbore, bp::object pofs, bp::object response, bp::object pixel, bp::object proj)
+nb::object ProjectionEngine<C,P,S>::pointing_matrix(
+    nb::object pbore, nb::object pofs, nb::object response, nb::object pixel, nb::object proj)
 {
-    auto _none = bp::object();
+    auto _none = nb::object();
 
     auto pointer = Pointer<C>();
     pointer.TestInputs(_none, pbore, pofs, _none, _none);
@@ -1433,15 +1433,15 @@ bp::object ProjectionEngine<C,P,S>::pointing_matrix(
         }
     }
 
-    return bp::make_tuple(pixel_buf_man.ret_val,
+    return nb::make_tuple(pixel_buf_man.ret_val,
                           proj_buf_man.ret_val);
 }
 
 template<typename C, typename P, typename S>
-bp::object ProjectionEngine<C,P,S>::pixel_ranges(
-    bp::object pbore, bp::object pofs, bp::object map, int n_domain)
+nb::object ProjectionEngine<C,P,S>::pixel_ranges(
+    nb::object pbore, nb::object pofs, nb::object map, int n_domain)
 {
-    auto _none = bp::object();
+    auto _none = nb::object();
 
     auto pointer = Pointer<C>();
     pointer.TestInputs(map, pbore, pofs, _none, _none);
@@ -1524,27 +1524,27 @@ bp::object ProjectionEngine<C,P,S>::pixel_ranges(
     }
 
     // Convert super vector to a list and return
-    auto ivals = bp::list();
+    auto ivals = nb::list();
     for (int i=0; i<ranges.size(); i++) {
-        auto bunches = bp::list();
+        auto bunches = nb::list();
         for (int j=0; j<ranges[i].size(); j++) {
-            auto domains = bp::list();
+            auto domains = nb::list();
             for (int i_det=0; i_det<n_det; i_det++) {
-                auto iv = ranges[i][j][i_det];
-                domains.append(bp::object(iv));
+                auto iv = nb::cast(ranges[i][j][i_det]);
+                domains.append(nb::object(iv));
             }
-            bunches.append(bp::extract<bp::object>(domains)());
+            bunches.append(nb::cast<nb::object>(domains));
         }
-        ivals.append(bp::extract<bp::object>(bunches)());
+        ivals.append(nb::cast<nb::object>(bunches));
     }
-    return bp::extract<bp::object>(ivals);
+    return nb::cast<nb::object>(ivals);
 }
 
 template<typename C, typename P, typename S>
 vector<int> ProjectionEngine<C,P,S>::tile_hits(
-    bp::object pbore, bp::object pofs)
+    nb::object pbore, nb::object pofs)
 {
-    auto _none = bp::object();
+    auto _none = nb::object();
 
     auto pointer = Pointer<C>();
     pointer.TestInputs(_none, pbore, pofs, _none, _none);
@@ -1604,10 +1604,10 @@ vector<int> ProjectionEngine<C,P,S>::tile_hits(
 //each thread is active only on certain tiles.  tile_map should be a
 //list of lists of tiles.
 template<typename C, typename P, typename S>
-bp::object ProjectionEngine<C,P,S>::tile_ranges(
-    bp::object pbore, bp::object pofs, bp::object tile_lists)
+nb::object ProjectionEngine<C,P,S>::tile_ranges(
+    nb::object pbore, nb::object pofs, nb::list tile_lists)
 {
-    auto _none = bp::object();
+    auto _none = nb::object();
 
     auto pointer = Pointer<C>();
     pointer.TestInputs(_none, pbore, pofs, _none, _none);
@@ -1617,14 +1617,14 @@ bp::object ProjectionEngine<C,P,S>::tile_ranges(
     int n_tile = _pixelizor.tile_count();
     if (n_tile < 0)
         throw RuntimeError_exception("No tiles in this pixelization.");
-    int n_domain = bp::len(tile_lists);
+    int n_domain = nb::len(tile_lists);
 
     // Make a vector that maps tile into thread.
     vector<int> thread_idx(n_tile, -1);
-    for (int i=0; i<bp::len(tile_lists); i++) {
-        auto tile_list = tile_lists[i];
-        for (int j=0; j<bp::len(tile_list); j++) {
-            int tile_idx = PyLong_AsLong(bp::object(tile_list[j]).ptr());
+    for (int i=0; i<nb::len(tile_lists); i++) {
+        nb::list tile_list = tile_lists[i];
+        for (int j=0; j<nb::len(tile_list); j++) {
+            int tile_idx = PyLong_AsLong(nb::object(tile_list[j]).ptr());
             thread_idx[tile_idx] = i;
        }
     }
@@ -1688,48 +1688,49 @@ bp::object ProjectionEngine<C,P,S>::tile_ranges(
     }
 
     // Convert super vector to a list and return
-    auto ivals = bp::list();
+    auto ivals = nb::list();
     for (int i=0; i<ranges.size(); i++) {
-        auto bunches = bp::list();
+        auto bunches = nb::list();
         for (int j=0; j<ranges[i].size(); j++) {
-            auto domains = bp::list();
+            auto domains = nb::list();
             for (int i_det=0; i_det<n_det; i_det++) {
-                auto iv = ranges[i][j][i_det];
-                domains.append(bp::object(iv));
+                auto iv = nb::cast(ranges[i][j][i_det]);
+                domains.append(nb::object(iv));
             }
-            bunches.append(bp::extract<bp::object>(domains)());
+            bunches.append(nb::cast<nb::object>(domains));
         }
-        ivals.append(bp::extract<bp::object>(bunches)());
+        ivals.append(nb::cast<nb::object>(bunches));
     }
-    return bp::extract<bp::object>(ivals);
+    return nb::cast<nb::object>(ivals);
 }
 
 template<typename C, typename P, typename S>
-bp::object ProjectionEngine<C,P,S>::zeros(bp::object shape)
+nb::object ProjectionEngine<C,P,S>::zeros(nb::object shape)
 {
     vector<int> dims;
-    bp::extract<int> int_ex(shape);
-    if (int_ex.check()) {
-        dims.push_back(int_ex());
+    int dim;
+    nb::tuple tup;
+    if (nb::try_cast<int>(shape, dim)) {
+        // scalar
+        dims.push_back(dim);
         return _pixelizor.zeros(dims);
-    }
-
-    bp::extract<bp::tuple> tuple_ex(shape);
-    if (tuple_ex.check()) {
-        auto tuple = tuple_ex();
-        for (int i=0; i<bp::len(tuple); i++)
-            dims.push_back(bp::extract<int>(tuple[i])());
+    } else if (nb::try_cast<nb::tuple>(shape, tup)) {
+        // tuple
+        for (int i = 0; i < nb::len(tup); ++i) {
+            dims.push_back(nb::cast<int>(tup[i]));
+        }
         return _pixelizor.zeros(dims);
+    } else {
+        // invalid
+        return nb::object();
     }
-
-    return bp::object();  //None on fall-through
 }
 
 template<typename C, typename P, typename S>
-bp::object ProjectionEngine<C,P,S>::from_map(
-    bp::object map, bp::object pbore, bp::object pofs, bp::object response, bp::object signal)
+nb::object ProjectionEngine<C,P,S>::from_map(
+    nb::object map, nb::object pbore, nb::object pofs, nb::object response, nb::object signal)
 {
-    auto _none = bp::object();
+    auto _none = nb::object();
 
     // Initialize pointer and _pixelizor.
     auto pointer = Pointer<C>();
@@ -1855,7 +1856,7 @@ void to_weight_map_single_thread(Pointer<C> &pointer,
 
 static
 vector<vector<vector<RangesInt32>>> derive_ranges(
-    bp::object thread_intervals, int n_det, int n_time,
+    nb::object thread_intervals, int n_det, int n_time,
     std::string arg_name)
 {
     // The first index of the returned object should correspond to
@@ -1871,27 +1872,31 @@ vector<vector<vector<RangesInt32>>> derive_ranges(
     //   which will be looped over in serial, each containing N[M]
     //   threads-lists that will be done in parallel.
     vector<vector<vector<RangesInt32>>> ivals;
+    RangesInt32 test_ranges;
 
     if (isNone(thread_intervals)) {
         // It's None. Generate a single bunch with a single-thread covering all samples
         auto r = RangesInt32(n_time).add_interval(0, n_time);
         vector<vector<RangesInt32>> v(1, vector<RangesInt32>(n_det, r));
         ivals.push_back(v);
-    } else if(bp::extract<RangesInt32>(thread_intervals[0]).check()) {
+    } else if (nb::try_cast<RangesInt32>(thread_intervals[0], test_ranges)) {
         // It's a RangesMatrix (ndet,nranges). Promote to single thread, single bunch
         ivals.push_back(vector<vector<RangesInt32>>(1, extract_ranges<int32_t>(thread_intervals)));
-    } else if(bp::extract<RangesInt32>(thread_intervals[0][0]).check()) {
+    } else if (nb::try_cast<RangesInt32>(thread_intervals[0][0], test_ranges)) {
         // It's a per-thread RangesMatrix (nthread,ndet,nranges). Promote to single bunch
         vector<vector<RangesInt32>> bunch;
-        for (int i=0; i<bp::len(thread_intervals); i++)
-            bunch.push_back(extract_ranges<int32_t>(thread_intervals[i]));
+        nb::list leading = nb::cast<nb::list>(thread_intervals);
+        for (int i=0; i<nb::len(leading); i++)
+            bunch.push_back(extract_ranges<int32_t>(leading[i]));
         ivals.push_back(bunch);
-    } else if(bp::extract<RangesInt32>(thread_intervals[0][0][0]).check()) {
+    } else if (nb::try_cast<RangesInt32>(thread_intervals[0][0][0], test_ranges)) {
         // It's a full multi-bunch (nbunch,nthread,ndet,nranges) thing.
-        for (int i=0; i<bp::len(thread_intervals); i++) {
+        nb::list leading_i = nb::cast<nb::list>(thread_intervals);
+        for (int i=0; i<nb::len(leading_i); i++) {
             vector<vector<RangesInt32>> bunch;
-            for (int j=0; j<bp::len(thread_intervals[i]); j++)
-                bunch.push_back(extract_ranges<int32_t>(thread_intervals[i][j]));
+            nb::list leading_j = nb::cast<nb::list>(leading_i[i]);
+            for (int j=0; j<nb::len(leading_j); j++)
+                bunch.push_back(extract_ranges<int32_t>(leading_j[j]));
             ivals.push_back(bunch);
         }
     } else {
@@ -1922,9 +1927,9 @@ vector<vector<vector<RangesInt32>>> derive_ranges(
 }
 
 template<typename C, typename P, typename S>
-bp::object ProjectionEngine<C,P,S>::to_map(
-    bp::object map, bp::object pbore, bp::object pofs, bp::object response,
-    bp::object signal, bp::object det_weights, bp::object thread_intervals)
+nb::object ProjectionEngine<C,P,S>::to_map(
+    nb::object map, nb::object pbore, nb::object pofs, nb::object response,
+    nb::object signal, nb::object det_weights, nb::object thread_intervals)
 {
     //Initialize it / check inputs.
     auto pointer = Pointer<C>();
@@ -1966,11 +1971,11 @@ bp::object ProjectionEngine<C,P,S>::to_map(
 }
 
 template<typename C, typename P, typename S>
-bp::object ProjectionEngine<C,P,S>::to_weight_map(
-    bp::object map, bp::object pbore, bp::object pofs, bp::object response,
-    bp::object det_weights, bp::object thread_intervals)
+nb::object ProjectionEngine<C,P,S>::to_weight_map(
+    nb::object map, nb::object pbore, nb::object pofs, nb::object response,
+    nb::object det_weights, nb::object thread_intervals)
 {
-    auto _none = bp::object();
+    auto _none = nb::object();
 
     //Initialize it / check inputs.
     auto pointer = Pointer<C>();
@@ -2024,19 +2029,19 @@ template<typename TilingSys>
 class ProjEng_Precomp {
 public:
     ProjEng_Precomp() {};
-    bp::object from_map(bp::object map, bp::object pixel_index, bp::object spin_proj,
-                        bp::object signal);
-    bp::object to_map(bp::object map, bp::object pixel_index, bp::object spin_proj,
-                      bp::object signal, bp::object weights, bp::object thread_intervals);
-    bp::object to_weight_map(bp::object map, bp::object pixel_index, bp::object spin_proj,
-                             bp::object weights, bp::object thread_intervals);
+    nb::object from_map(nb::object map, nb::object pixel_index, nb::object spin_proj,
+                        nb::object signal);
+    nb::object to_map(nb::object map, nb::object pixel_index, nb::object spin_proj,
+                      nb::object signal, nb::object weights, nb::object thread_intervals);
+    nb::object to_weight_map(nb::object map, nb::object pixel_index, nb::object spin_proj,
+                             nb::object weights, nb::object thread_intervals);
 };
 
 
 template<typename TilingSys>
-bp::object ProjEng_Precomp<TilingSys>::from_map(
-    bp::object map, bp::object pixel_index, bp::object spin_proj,
-    bp::object signal)
+nb::object ProjEng_Precomp<TilingSys>::from_map(
+    nb::object map, nb::object pixel_index, nb::object spin_proj,
+    nb::object signal)
 {
     // You won't get far without pixel_index, so use that to nail down
     // the n_time and n_det.
@@ -2155,9 +2160,9 @@ void precomp_to_weight_map_single_thread(Pixelizor2_Flat<TilingSys> &tiling,
 }
 
 template<typename TilingSys>
-bp::object ProjEng_Precomp<TilingSys>::to_map(
-    bp::object map, bp::object pixel_index, bp::object spin_proj,
-    bp::object signal, bp::object det_weights, bp::object thread_intervals)
+nb::object ProjEng_Precomp<TilingSys>::to_map(
+    nb::object map, nb::object pixel_index, nb::object spin_proj,
+    nb::object signal, nb::object det_weights, nb::object thread_intervals)
 {
     // You won't get far without pixel_index, so use that to nail down
     // the n_time and n_det.
@@ -2206,9 +2211,9 @@ bp::object ProjEng_Precomp<TilingSys>::to_map(
 }
 
 template<typename TilingSys>
-bp::object ProjEng_Precomp<TilingSys>::to_weight_map(
-    bp::object map, bp::object pixel_index, bp::object spin_proj,
-    bp::object det_weights, bp::object thread_intervals)
+nb::object ProjEng_Precomp<TilingSys>::to_weight_map(
+    nb::object map, nb::object pixel_index, nb::object spin_proj,
+    nb::object det_weights, nb::object thread_intervals)
 {
     // You won't get far without pixel_index, so use that to nail down
     // the n_time and n_det.
@@ -2292,9 +2297,10 @@ TYPEDEF_PIX(ZEA)
 #define STRINGIFY(X) #X
 
 #define EXPORT_ENGINE(CLASSNAME)                                        \
-    bp::class_<CLASSNAME>(STRINGIFY(CLASSNAME), bp::init<bp::object>()) \
-    .add_property("index_count", &CLASSNAME::index_count)               \
-    .add_property("comp_count", &CLASSNAME::comp_count)                 \
+    nb::class_<CLASSNAME>(m, STRINGIFY(CLASSNAME))                      \
+    .def(nb::init<nb::object>())                                        \
+    .def_prop_ro("index_count", &CLASSNAME::index_count)               \
+    .def_prop_ro("comp_count", &CLASSNAME::comp_count)                 \
     .def("coords", &CLASSNAME::coords)                                  \
     .def("pixels", &CLASSNAME::pixels)                                  \
     .def("tile_hits", &CLASSNAME::tile_hits)                            \
@@ -2322,7 +2328,7 @@ TYPEDEF_PIX(ZEA)
     EXPORT_SPIN(PIX, TQU)
 
 #define EXPORT_PRECOMP(CLASSNAME)                                       \
-    bp::class_<CLASSNAME>(#CLASSNAME)                                   \
+    nb::class_<CLASSNAME>(m, #CLASSNAME)                                   \
     .def("from_map", &CLASSNAME::from_map)                              \
     .def("to_map", &CLASSNAME::to_map)                                  \
     .def("to_weight_map", &CLASSNAME::to_weight_map)                    \
@@ -2334,8 +2340,8 @@ template<typename T>
 inline
 int _index_count(const T &) { return T::index_count; }
 
-PYBINDINGS("so3g")
-{
+
+void register_projection(nb::module_ & m) {
     EXPORT_PIX(Flat);
     EXPORT_PIX(Quat);
     EXPORT_PIX(CAR);
@@ -2355,4 +2361,5 @@ PYBINDINGS("so3g")
     EXPORT_ENGINE(ProjEng_HP_QU_Tiled);
     EXPORT_ENGINE(ProjEng_HP_TQU_Tiled);
 
+    return;
 }
diff --git a/src/Ranges.cxx b/src/Ranges.cxx
index 1cfb7441..a60083ec 100644
--- a/src/Ranges.cxx
+++ b/src/Ranges.cxx
@@ -1,12 +1,13 @@
 #define NO_IMPORT_ARRAY
 
-#include <pybindings.h>
-
 #include <iostream>
 #include <limits>
 #include <type_traits>
 
-#include <boost/python.hpp>
+#include <nanobind/nanobind.h>
+#include <nanobind/operators.h>
+#include <nanobind/stl/tuple.h>
+#include <nanobind/stl/string.h>
 
 #include "so3g_numpy.h"
 
@@ -14,6 +15,8 @@
 #include "exceptions.h"
 #include <exception>
 
+namespace nb = nanobind;
+
 
 template <typename T>
 std::string Ranges<T>::Description() const
@@ -61,7 +64,7 @@ void Ranges<T>::cleanup()
 
 template <typename T>
 Ranges<T>& Ranges<T>::_add_interval_numpysafe(
-    const bp::object start_obj, const bp::object end_obj)
+    const nb::object start_obj, const nb::object end_obj)
 {
     int start = numpysafe_extract_int(start_obj, "start");
     int end = numpysafe_extract_int(end_obj, "end");
@@ -167,7 +170,7 @@ Ranges<T>& Ranges<T>::close_gaps(const T gap)
             p++;
         }
     }
-    
+
     return *this;
 }
 
@@ -241,32 +244,32 @@ static int format_to_dtype(const BufferWrapper<T> &view)
         case 8:
             return NPY_UINT64;
         }
-    } 
+    }
 
     return NPY_NOTYPE;
 }
 
 
 template <typename T>
-Ranges<T> Ranges<T>::from_array(const bp::object &src, const bp::object &count)
+Ranges<T> * Ranges<T>::from_array(const nb::object &src, const nb::object &count)
 {
-    Ranges<T> output;
+    Ranges<T> * output = new Ranges<T>();
     BufferWrapper<T> buf("src", src, false, vector<int>{-1, 2});
 
     char *d = (char*)buf->buf;
     int n_seg = buf->shape[0];
     for (int i=0; i<n_seg; ++i) {
-        output.segments.push_back(interval_pair<T>(d, d+buf->strides[1]));
+        output->segments.push_back(interval_pair<T>(d, d+buf->strides[1]));
         d += buf->strides[0];
     }
-    output.count = numpysafe_extract_int(count, "count");
+    output->count = numpysafe_extract_int(count, "count");
 
-    output.cleanup();
+    output->cleanup();
     return output;
 }
 
 template <typename T>
-bp::object Ranges<T>::ranges() const
+nb::object Ranges<T>::ranges() const
 {
     npy_intp dims[2] = {0, 2};
     dims[0] = (npy_intp)segments.size();
@@ -275,18 +278,24 @@ bp::object Ranges<T>::ranges() const
         throw general_agreement_exception("ranges() not implemented for this domain dtype.");
 
     PyObject *v = PyArray_SimpleNew(2, dims, dtype);
+    if (v == NULL) {
+        ostringstream dstr;
+        dstr << "Failed to allocate Ranges numpy array of size (";
+        dstr << dims[0] << ", " << dims[1] << ")";
+        throw RuntimeError_exception(dstr.str().c_str());
+    }
     char *ptr = reinterpret_cast<char*>((PyArray_DATA((PyArrayObject*)v)));
     for (auto p = segments.begin(); p != segments.end(); ++p) {
         ptr += interval_extract((&*p), ptr);
     }
-    return bp::object(bp::handle<>(v));
+    return nb::steal<nb::object>(v);
 }
 
 
-// 
+//
 // Bit-mask conversion - convert between list<RangesInt> and
 // ndarray bit-masks.
-// 
+//
 // intType is the type of the Interval, which should be a simple
 // signed integer type (e.g. int64_t).  numpyType is a simple unsigned
 // type carried in the ndarray (e.g. uint8_t).  The n_bits argument is
@@ -295,7 +304,7 @@ bp::object Ranges<T>::ranges() const
 // uint8_t).
 
 template <typename intType, typename numpyType>
-static inline bp::object from_bitmask_(void *buf, intType count, int n_bits)
+static inline nb::object from_bitmask_(void *buf, intType count, int n_bits)
 {
     bool return_singleton = (n_bits == 0);
 
@@ -336,28 +345,28 @@ static inline bp::object from_bitmask_(void *buf, intType count, int n_bits)
     }
 
     if (return_singleton)
-        return bp::object(output[0]);
+        return nb::cast(output[0]);
 
     // Once added to the list, we can't modify further.
-    bp::list bits;
+    nb::list bits;
     for (auto i: output)
         bits.append(i);
     return bits;
 }
 
 template <typename T>
-bp::object Ranges<T>::from_bitmask(const bp::object &src, int n_bits)
+nb::object Ranges<T>::from_bitmask(const nb::object &src, int n_bits)
 {
     // Buffer protocol doesn't work directly on bool arrays, so if
     // what we've been passed is definitely a bool array, get a view
     // of it as a uint8 array and work with that.  (Wrap it with
-    // bp::object so references are counted properly.)
-    bp::object target(src);
+    // nb::object so references are counted properly.)
+    nb::object target(src);
     PyObject* obj_ptr = target.ptr();
     if (PyArray_Check(obj_ptr))
         if (PyArray_ISBOOL((PyArrayObject *)obj_ptr)) {
             obj_ptr = PyArray_Cast((PyArrayObject *)obj_ptr, NPY_UINT8);
-            target = bp::object(bp::handle<>(obj_ptr));
+            target = nb::steal<nb::object>(obj_ptr);
         }
 
     BufferWrapper<T> buf("src", target, false);
@@ -386,11 +395,11 @@ bp::object Ranges<T>::from_bitmask(const bp::object &src, int n_bits)
     }
 
     throw dtype_exception("src", "integer type");
-    return bp::object();
+    return nb::object();
 }
 
 template <typename T>
-bp::object Ranges<T>::from_mask(const bp::object &src)
+nb::object Ranges<T>::from_mask(const nb::object &src)
 {
     return from_bitmask(src, 0);
 }
@@ -406,20 +415,20 @@ bp::object Ranges<T>::from_mask(const bp::object &src)
 
 template <typename intType,typename std::enable_if<!std::is_integral<intType>::value,
                                                    int>::type* = nullptr>
-static inline bp::object mask_(vector<Ranges<intType>> ivals, int n_bits)
+static inline nb::object mask_(vector<Ranges<intType>> ivals, int n_bits)
 {
     intType x;
     throw dtype_exception("ivlist", "Interval<> over integral type.");
-    return bp::object();
+    return nb::object();
 }
 
 template <typename intType, typename std::enable_if<std::is_integral<intType>::value,
                                                     int>::type* = nullptr>
-static inline bp::object mask_(vector<Ranges<intType>> ivals, int n_bits)
+static inline nb::object mask_(vector<Ranges<intType>> ivals, int n_bits)
 {
     vector<int> indexes;
     int count = 0;
-    
+
     for (long i=0; i<ivals.size(); i++) {
         indexes.push_back(0);
         if (i==0) {
@@ -458,6 +467,12 @@ static inline bp::object mask_(vector<Ranges<intType>> ivals, int n_bits)
 
     npy_intp dims[1] = {count};
     PyObject *v = PyArray_SimpleNew(1, dims, npy_type);
+    if (v == NULL) {
+        ostringstream dstr;
+        dstr << "Failed to allocate Ranges mask array of size (";
+        dstr << dims[0] << ",)";
+        throw RuntimeError_exception(dstr.str().c_str());
+    }
 
     // Assumes little-endian.
     int n_byte = PyArray_ITEMSIZE((PyArrayObject*)v);
@@ -470,27 +485,27 @@ static inline bp::object mask_(vector<Ranges<intType>> ivals, int n_bits)
         }
     }
 
-    return bp::object(bp::handle<>(v));
+    return nb::steal<nb::object>(v);
 }
 
 template <typename intType>
-static inline bp::object mask_(const bp::list &ivlist, int n_bits)
+static inline nb::object mask_(const nb::list &ivlist, int n_bits)
 {
     vector<Ranges<intType>> ivals;
-    for (long i=0; i<bp::len(ivlist); i++)
-        ivals.push_back(bp::extract<Ranges<intType>>(ivlist[i]));
+    for (long i=0; i<nb::len(ivlist); i++)
+        ivals.push_back(nb::cast<Ranges<intType>>(ivlist[i]));
     return mask_(ivals, n_bits);
 }
 
 
 template <typename T>
-bp::object Ranges<T>::bitmask(const bp::list &ivlist, int n_bits)
+nb::object Ranges<T>::bitmask(const nb::list &ivlist, int n_bits)
 {
     return mask_<T>(ivlist, n_bits);
 }
 
 template <typename T>
-bp::object Ranges<T>::mask()
+nb::object Ranges<T>::mask()
 {
     vector<Ranges<T>> temp;
     temp.push_back(*this);
@@ -502,14 +517,14 @@ bp::object Ranges<T>::mask()
 //
 // Implementation of the algebra
 //
- 
+
 template <typename T>
 Ranges<T>& Ranges<T>::intersect(const Ranges<T> &src)
 {
     *this = (this->complement() + src.complement()).complement();
     return *this;
 }
-    
+
 template <typename T>
 Ranges<T> Ranges<T>::complement() const
 {
@@ -531,7 +546,7 @@ Ranges<T> Ranges<T>::zeros_like() const
 {
     Ranges<T> output(count, reference);
     return output;
-    
+
 }
 
 //make "full" range to match this range
@@ -541,7 +556,7 @@ Ranges<T> Ranges<T>::ones_like() const
     Ranges<T> output(count, reference);
     output.add_interval(0, count);
     return output;
-    
+
 }
 
 
@@ -549,7 +564,7 @@ Ranges<T> Ranges<T>::ones_like() const
 template <typename T,
           typename std::enable_if<!std::is_integral<T>::value,
                                   int>::type* = nullptr>
-static inline Ranges<T> _getitem_(Ranges<T> &src, bp::object indices)
+static inline Ranges<T> _getitem_(Ranges<T> &src, nb::object indices)
 {
     throw dtype_exception("target", "Interval<> over integral type.");
     return Ranges<T>();
@@ -558,73 +573,73 @@ static inline Ranges<T> _getitem_(Ranges<T> &src, bp::object indices)
 template <typename objType, typename T>
 static inline T extract_or_default(objType src, T default_)
 {
-    bp::extract<T> ex(src);
-    if (ex.check())
-        return ex();
-    return default_;
+    T result;
+    if (nb::try_cast<T>(src, result)) {
+        // Successful cast
+        return result;
+    } else {
+        return default_;
+    }
 }
 
 template <typename T,
           typename std::enable_if<std::is_integral<T>::value,
                                   int>::type* = nullptr>
-static inline Ranges<T> _getitem_(Ranges<T> &src, bp::object indices)
+static inline Ranges<T> _getitem_(Ranges<T> &src, nb::object indices)
 {
-    bp::object target = indices;
+    nb::object target = indices;
 
     // Allow user to pass in a length-one tuple of slices.
-    bp::extract<bp::tuple> tu_ex(target);
-    if (tu_ex.check()) {
-        if (bp::len(tu_ex()) == 0)
-            target = bp::slice();
-        else {
-            assert(bp::len(tu_ex()) == 1);
-            target = tu_ex()[0];
+    nb::slice slc;
+    if (nb::isinstance<nb::tuple>(indices)) {
+        nb::tuple temp = nb::cast<nb::tuple>(indices);
+        if (nb::len(temp) != 1) {
+            throw general_agreement_exception("Ranges __getitem__ got tuple of len >1");
         }
+        slc = nb::cast<nb::slice>(temp[0]);
+    } else if (nb::isinstance<nb::slice>(indices)) {
+        slc = nb::cast<nb::slice>(indices);
+    } else {
+        return Ranges<T>();
     }
 
-    bp::extract<bp::slice> ex(target);
-    if (ex.check()) {
-        T n = src.count;
-
-        auto sl = ex();
-        T start = extract_or_default(sl.start(), 0);
-        T stop = extract_or_default(sl.stop(), n);
-        T step = extract_or_default(sl.step(), 1);
-
-        assert(step == 1);
-        if (start < 0)
-            start = n + start;
-        if (stop < 0)
-            stop = n + stop;
-        if (stop < start)
-            stop = start;
-        if (start > n)
-            start = n;
-        if (stop > n)
-            stop = n;
-
-        auto output = Ranges<T>(stop - start, src.reference - start);
-        for (auto p: src.segments)
-            if (p.second > start && p.first < stop)
-                output.segments.push_back(make_pair(p.first - start, p.second - start));
-        output.cleanup();
-
-        return output;
-    }
-    return Ranges<T>();
+    T n = src.count;
+    auto slc_par = slc.compute(n);
+    T start = slc_par.template get<0>();
+    T stop = slc_par.template get<1>();
+    T step = slc_par.template get<2>();
+
+    assert(step == 1);
+    if (start < 0)
+        start = n + start;
+    if (stop < 0)
+        stop = n + stop;
+    if (stop < start)
+        stop = start;
+    if (start > n)
+        start = n;
+    if (stop > n)
+        stop = n;
+
+    auto output = Ranges<T>(stop - start, src.reference - start);
+    for (auto p: src.segments)
+        if (p.second > start && p.first < stop)
+            output.segments.push_back(make_pair(p.first - start, p.second - start));
+    output.cleanup();
+
+    return output;
 }
 
 template <typename T>
-Ranges<T> Ranges<T>::getitem(bp::object indices)
+Ranges<T> Ranges<T>::getitem(nb::object indices)
 {
     return _getitem_(*this, indices);
 }
 
 template <typename T>
-bp::object Ranges<T>::shape()
+nb::object Ranges<T>::shape()
 {
-    vector<T> temp = {count};
-    return bp::tuple(temp);
+    return nb::make_tuple(count);
 }
 
 template <typename T>
@@ -674,118 +689,185 @@ Ranges<T> Ranges<T>::operator*(const Ranges<T> &src) const
 }
 
 
-//
-// boost-python registration.
-//
-
-using namespace boost::python;
-
-
-#define EXPORT_RANGES(DOMAIN_TYPE, CLASSNAME)                           \
-    bp::class_<CLASSNAME>(#CLASSNAME,                                   \
-    "A finite series of non-overlapping semi-open intervals on a domain\n" \
-    "of type: " #DOMAIN_TYPE ".\n\n"                                    \
-    "To create an empty object, instantiate with just a sample count:\n" \
-    "``" #CLASSNAME "(count)``.\n"                                      \
-    "\n"                                                                \
-    "Alternately, consider convenience methods such as ``from_mask``,\n" \
-    "``from_array``, and ``from_bitmask``; see below.\n"                \
-    "\n"                                                                \
-    "In addition to the methods explained below, note the that following\n" \
-    "operators have been defined and perform as follows (where ``r1`` and\n" \
-    "``r2`` are objects of this class:\n"                               \
-    "\n"                                                                \
-    "- ``~r1`` is equivalent to ``r1.complement()``\n"                  \
-    "- ``r1 *= r2`` is equivalent to ``r1.intersect(r2)``\n"            \
-    "- ``r1 += r2`` is equivalent to ``r1.merge(r2)``\n"                \
-    "- ``r1 * r2`` and ``r1 + r2`` behave as you might expect, returning a\n" \
-    "  new object and leaving ``r1`` and ``r2`` unmodified.\n"          \
-    "\n"                                                                \
-    "The object also supports slicing.  For example, if ``r1`` has\n"   \
-    "count = 100 then r1[10:-5] returns a new object (not a view)\n"    \
-    "that has count = 85.  A data member ``reference`` keeps track\n"   \
-    "of the history of shifts in the first sample; for example if\n"    \
-    "r1.reference = 0 then r1[10:-5].reference will be -10.  This\n"    \
-    "variable can be interpreted as giving the logical index, in\n"     \
-    "the new index system, of where index=0 of the original object\n"   \
-    "would be found.  This is useful for bookkeeping in some cases.\n") \
-    .def(init<const DOMAIN_TYPE&>("Initialize with count."))            \
-    .def(init<const DOMAIN_TYPE&, const DOMAIN_TYPE&>("Initialize with count and reference.")) \
-    .def("__str__", &CLASSNAME::Description) \
-    .add_property("count", &CLASSNAME::count, &CLASSNAME::safe_set_count) \
-    .add_property("reference", &CLASSNAME::reference)                   \
-    .def("add_interval", &CLASSNAME::_add_interval_numpysafe,           \
-         return_internal_reference<>(),                                 \
-         args("self", "start", "end"),                                  \
-         "Merge an interval into the set.")                             \
-    .def("append_interval_no_check", &CLASSNAME::append_interval_no_check, \
-         return_internal_reference<>(),                                 \
-         args("self", "start", "end"),                                  \
-         "Append an interval to the set without checking for overlap or sequence.") \
-    .def("merge", &CLASSNAME::merge,                                    \
-         return_internal_reference<>(),                                 \
-         args("self", "src"),                                           \
-         "Merge ranges from another " #CLASSNAME " into this one.")     \
-    .def("buffer", &CLASSNAME::buffer,                                  \
-        return_internal_reference<>(),                                  \
-        args("self", "buff"),                                           \
-        "Buffer each interval by an amount specified by buff")          \
-    .def("buffered", &CLASSNAME::buffered,                              \
-        args("self", "buff"),                                           \
-        "Return an interval buffered by buff")                          \
-    .def("close_gaps", &CLASSNAME::close_gaps,                          \
-        return_internal_reference<>(),                                  \
-        args("self", "gap"),                                            \
-        "Remove gaps between ranges less than gap")                     \
-    .def("intersect", &CLASSNAME::intersect,                            \
-         return_internal_reference<>(),                                 \
-         args("self", "src"),                                           \
-         "Intersect another " #CLASSNAME " with this one.")             \
-    .def("complement", &CLASSNAME::complement,                          \
-         "Return the complement (over domain).")                        \
-    .def("zeros_like", &CLASSNAME::zeros_like,                          \
-         "Return range of same length but no intervals")                \
-    .def("ones_like", &CLASSNAME::ones_like,                            \
-         "Return range of same length and interval spanning count")     \
-    .def("ranges", &CLASSNAME::ranges,                                  \
-         "Return the intervals as a 2-d numpy array of ranges.")        \
-    .def("from_array", &CLASSNAME::from_array,                          \
-         args("data", "count"),                                         \
-         "The input data must be an (n,2) shape ndarray of int32. "     \
-         "The integer count sets the domain of the object.")            \
-    .staticmethod("from_array")                                         \
-    .def("from_bitmask", &CLASSNAME::from_mask,                         \
-         args("bitmask_array"),                                         \
-         "Return a list of " #CLASSNAME " extracted from an ndarray encoding a bitmask.") \
-    .staticmethod("from_bitmask")                                       \
-    .def("from_mask", &CLASSNAME::from_mask,                            \
-         args("bool_array"),                                            \
-         "Return a list of " #CLASSNAME " extracted from an ndarray of bool.") \
-    .staticmethod("from_mask")                                          \
-    .def("bitmask", &CLASSNAME::bitmask,                                \
-         args("ranges_list", "n_bits"),                                 \
-         "Return an ndarray bitmask from a list of" #CLASSNAME ".\n"    \
-         "n_bits determines the output integer type.  Bits are assigned from \n" \
-         "LSB onwards; use None in the list to skip a bit.")            \
-    .staticmethod("bitmask")                                            \
-    .def("mask", &CLASSNAME::mask,                                      \
-         "Return a boolean mask from this Ranges object.")              \
-    .def("copy",                                                        \
-         +[](CLASSNAME& A) {                                            \
-             return CLASSNAME(A);                                       \
-         },                                                             \
-         "Get a new object with a copy of the data.")                   \
-    .def("__getitem__", &CLASSNAME::getitem)                            \
-    .add_property("shape", &CLASSNAME::shape)                           \
-    .def(~self)                                                         \
-    .def(self += self)                                                  \
-    .def(self *= self)                                                  \
-    .def(self + self)                                                   \
-    .def(self * self);
-
-
-PYBINDINGS("so3g")
-{
-    docstring_options local_docstring_options(true, true, false);
-    EXPORT_RANGES(int32_t, RangesInt32);
+// Helper function to register an Ranges class for a concrete type.
+
+template <typename C>
+void ranges_bindings(nb::module_ & m, char const * name) {
+
+    nb::class_<Ranges<C>>(m, name,
+            R"(
+            A finite series of non-overlapping semi-open intervals on a domain.
+
+            To create an empty object, instantiate with just a sample count.
+            Alternately, consider convenience methods such as ``from_mask``,
+            ``from_array``, and ``from_bitmask``; see below.
+
+            In addition to the methods explained below, note the that following
+            operators have been defined and perform as follows (where ``r1`` and
+            ``r2`` are objects of this class:
+
+            - ``~r1`` is equivalent to ``r1.complement()``
+            - ``r1 *= r2`` is equivalent to ``r1.intersect(r2)``
+            - ``r1 += r2`` is equivalent to ``r1.merge(r2)``
+            - ``r1 * r2`` and ``r1 + r2`` behave as you might expect, returning a
+                new object and leaving ``r1`` and ``r2`` unmodified.
+
+            The object also supports slicing.  For example, if ``r1`` has
+            count = 100 then r1[10:-5] returns a new object (not a view)
+            that has count = 85.  A data member ``reference`` keeps track
+            of the history of shifts in the first sample; for example if
+            r1.reference = 0 then r1[10:-5].reference will be -10.  This
+            variable can be interpreted as giving the logical index, in
+            the new index system, of where index=0 of the original object
+            would be found.  This is useful for bookkeeping in some cases.
+            )"
+        )
+        .def(nb::init<>())
+        .def(nb::init<C>(),
+            R"(
+            Initialize with count.
+            )"
+        )
+        .def(nb::init<C, C>(),
+            R"(
+            Initialize with count and reference.
+            )"
+        )
+        .def("__str__", &Ranges<C>::Description)
+        .def_prop_rw("count",
+            [](Ranges<C> & slf){
+                return slf.count;
+            },
+            &Ranges<C>::safe_set_count
+        )
+        .def_prop_ro("reference",
+            [](Ranges<C> & slf){
+                return slf.reference;
+            }
+        )
+        .def_prop_ro("shape", &Ranges<C>::shape)
+        .def("add_interval", &Ranges<C>::add_interval, nb::rv_policy::none,
+            nb::arg("start"),
+            nb::arg("end"),
+            R"(
+            Merge an interval into the set.
+            )"
+        )
+        .def("append_interval_no_check", &Ranges<C>::append_interval_no_check,
+            nb::rv_policy::none,
+            nb::arg("start"),
+            nb::arg("end"),
+            R"(
+            Append an interval to the set without checking for overlap or sequence.
+            )"
+        )
+        .def("merge", &Ranges<C>::merge, nb::rv_policy::none,
+            R"(
+            Merge ranges from another object into this one.
+            )"
+        )
+        .def("intersect", &Ranges<C>::intersect, nb::rv_policy::none,
+            nb::arg("source"),
+            R"(
+            Intersect another Ranges object with this one.
+            )"
+        )
+        .def("complement", &Ranges<C>::complement, nb::rv_policy::take_ownership,
+            R"(
+            Return the complement (over domain).
+            )"
+        )
+        .def(
+            "copy",
+            [](Ranges<C> & slf) {return Ranges<C>(slf);},
+            nb::rv_policy::take_ownership,
+            R"(
+            Get a new object with a copy of the data.
+            )"
+        )
+        .def("buffer", &Ranges<C>::buffer, nb::rv_policy::none,
+            nb::arg("buff"),
+            R"(
+            Buffer each interval by an amount specified by buff
+            )"
+        )
+        .def("buffered", &Ranges<C>::buffered, nb::rv_policy::take_ownership,
+            nb::arg("buff"),
+            R"(
+            Return an interval buffered by buff
+            )"
+        )
+        .def("close_gaps", &Ranges<C>::close_gaps, nb::rv_policy::none,
+            nb::arg("gap"),
+            R"(
+            Remove gaps between ranges less than gap
+            )"
+        )
+        .def("zeros_like", &Ranges<C>::zeros_like, nb::rv_policy::take_ownership,
+            R"(
+            Return range of same length but no intervals
+            )"
+        )
+        .def("ones_like", &Ranges<C>::ones_like, nb::rv_policy::take_ownership,
+            R"(
+            Return range of same length and interval spanning count
+            )"
+        )
+        .def("ranges", &Ranges<C>::ranges, nb::rv_policy::take_ownership,
+            R"(
+            Return the intervals as a 2-d numpy array of ranges.
+            )"
+        )
+        .def_static("from_array", &Ranges<C>::from_array,
+            nb::rv_policy::take_ownership,
+            nb::arg("src"),
+            nb::arg("count"),
+            R"(
+            The input data must be an (n,2) shape ndarray of int32.
+            The integer count sets the domain of the object.
+            )"
+        )
+        .def("__getitem__", &Ranges<C>::getitem, nb::rv_policy::take_ownership)
+        .def_static("from_bitmask", &Ranges<C>::from_mask,
+            nb::rv_policy::take_ownership,
+            nb::arg("bitmask_array"),
+            R"(
+            Return a list of Ranges extracted from an ndarray encoding a bitmask.
+            )"
+        )
+        .def_static("from_mask", &Ranges<C>::from_mask,
+            nb::rv_policy::take_ownership,
+            nb::arg("bool_array"),
+            R"(
+            Return a list of Ranges extracted from an ndarray of bool.
+            )"
+        )
+        .def_static("bitmask", &Ranges<C>::bitmask, nb::rv_policy::take_ownership,
+            nb::arg("ranges_list"),
+            nb::arg("n_bits"),
+            R"(
+            Return an ndarray bitmask from a list of Ranges.  n_bits determines
+            the output integer type.  Bits are assigned from LSB onwards; use None
+            in the list to skip a bit.
+            )"
+        )
+        .def("mask", &Ranges<C>::mask, nb::rv_policy::take_ownership,
+            R"(
+            Return a boolean mask from this Ranges object.
+            )"
+        )
+        .def(~nb::self)
+        .def(nb::self += nb::self)
+        .def(nb::self *= nb::self)
+        .def(nb::self + nb::self)
+        .def(nb::self * nb::self);
+
+    return;
+}
+
+
+void register_ranges(nb::module_ & m) {
+    // Concrete Ranges types
+    ranges_bindings<int32_t>(m, "RangesInt32");
+    return;
 }
diff --git a/src/array_ops.cxx b/src/array_ops.cxx
index 66bc53cd..99daa532 100644
--- a/src/array_ops.cxx
+++ b/src/array_ops.cxx
@@ -15,27 +15,30 @@ extern "C" {
                 double* b, int* ldb, int* info );
 }
 
-#include <boost/python.hpp>
 #ifdef _OPENMP
 # include <omp.h>
 #endif // ifdef _OPENMP
 
+#include <nanobind/nanobind.h>
+
 #include <gsl/gsl_spline.h>
 #include <gsl/gsl_statistics.h>
 
-#include <pybindings.h>
 #include "so3g_numpy.h"
 #include "numpy_assist.h"
 #include "Ranges.h"
 #include "array_ops.h"
 
+namespace nb = nanobind;
+
+
 // TODO: Generalize to double precision too.
 // This implements Jon's noise model for ACT. It takes in
 // * ft[ndet,nfreq]        the fourier transform of the time-ordered data
 // * bins[nbin,{from,to}]  the start and end of each bin
 // * iD[nbin,ndet]         the inverse uncorrelated variance for each detector per bin
 // * iV[nbin,ndet,nvec]    matrix representing the scaled eivenvectors per bin
-void nmat_detvecs_apply(const bp::object & ft, const bp::object & bins, const bp::object & iD, const bp::object & iV, float s, float norm) {
+void nmat_detvecs_apply(const nb::object & ft, const nb::object & bins, const nb::object & iD, const nb::object & iV, float s, float norm) {
     // Should pass in this too
     BufferWrapper<float>               ft_buf  ("ft",   ft,   false, std::vector<int>{-1,-1});
     BufferWrapper<int32_t>             bins_buf("bins", bins, false, std::vector<int>{-1, 2});
@@ -89,7 +92,7 @@ void nmat_detvecs_apply(const bp::object & ft, const bp::object & bins, const bp
 // probably be moved into its own file.
 
 // Forward declarations of helper functions
-int get_dtype(const bp::object &);
+int get_dtype(const nb::object &);
 int pcut_full_measure_helper(const vector<RangesInt32> &);
 template <typename T> void pcut_full_tod2vals_helper(const vector<RangesInt32> &, T *, int, T *);
 template <typename T> void pcut_full_vals2tod_helper(const vector<RangesInt32> &, T *, int, T *);
@@ -125,14 +128,14 @@ template <typename T> void pcut_poly_translate_helper(const vector<RangesInt32>
 // cut range starts. This will be fast enough to build on the fly. Would pass this as an extra
 // argument to the helper functions.
 
-int process_cuts(const bp::object & range_matrix, const std::string & operation, const std::string & model, const bp::dict & params, const bp::object & tod, const bp::object & vals) {
+int process_cuts(const nb::object & range_matrix, const std::string & operation, const std::string & model, const nb::dict & params, const nb::object & tod, const nb::object & vals) {
     auto ranges = extract_ranges<int32_t>(range_matrix);
     // Decoding these up here lets us avoid some duplication later
     int resolution, nmax;
     if     (model == "full") {}
     else if(model == "poly") {
-        resolution = bp::extract<int>(params.get("resolution"));
-        nmax       = bp::extract<int>(params.get("nmax"));
+        resolution = nb::cast<int>(params["resolution"]);
+        nmax       = nb::cast<int>(params["nmax"]);
     } else throw ValueError_exception("process_cuts model can only be 'full' or 'poly'");
 
     if(operation == "measure") {
@@ -179,14 +182,14 @@ int process_cuts(const bp::object & range_matrix, const std::string & operation,
     return 0;
 }
 
-void translate_cuts(const bp::object & irange_matrix, const bp::object & orange_matrix, const std::string & model, const bp::dict & params, const bp::object & ivals, bp::object & ovals) {
+void translate_cuts(const nb::object & irange_matrix, const nb::object & orange_matrix, const std::string & model, const nb::dict & params, const nb::object & ivals, nb::object & ovals) {
     // Decoding these up here lets us avoid some duplication later
     int resolution, nmax;
     if       (model == "full") {
         // nothing to do - res and nmax not used here
     } else if(model == "poly") {
-        resolution = bp::extract<int>(params.get("resolution"));
-        nmax       = bp::extract<int>(params.get("nmax"));
+        resolution = nb::cast<int>(params["resolution"]);
+        nmax       = nb::cast<int>(params["nmax"]);
     } else {
         throw ValueError_exception("process_cuts model can only be 'full' or 'poly'");
     }
@@ -214,7 +217,7 @@ void translate_cuts(const bp::object & irange_matrix, const bp::object & orange_
 
 // Helpers for the cuts
 
-int get_dtype(const bp::object & arr) {
+int get_dtype(const nb::object & arr) {
     PyObject *ob = PyArray_FromAny(arr.ptr(), NULL, 0, 0, 0, NULL);
     if (ob == NULL) throw exception();
     PyArrayObject * a = reinterpret_cast<PyArrayObject*>(ob);
@@ -519,12 +522,12 @@ void get_gap_fill_poly_single(const RangesInt32 &gaps, T *data,
 }
 
 template <typename T>
-void get_gap_fill_poly(const bp::object ranges,
-                       const bp::object tod,
+void get_gap_fill_poly(const nb::object ranges,
+                       const nb::object tod,
                        int buffer,
                        int order,
                        bool inplace,
-                       const bp::object ex)
+                       const nb::object ex)
 {
     // As a test, copy data from rangemat into segment.
     auto rangemat = extract_ranges<int32_t>(ranges);
@@ -565,12 +568,12 @@ void get_gap_fill_poly(const bp::object ranges,
 }
 
 
-void test_buffer_wrapper(const bp::object array,
-                         const bp::object dims)
+void test_buffer_wrapper(const nb::object array,
+                         const nb::object dims)
 {
-    std::vector<int> _dims(bp::len(dims));
-    for (int i=0; i<bp::len(dims); i++)
-        _dims[i] = bp::extract<double>(dims[i]);
+    std::vector<int> _dims(nb::len(dims));
+    for (int i=0; i<nb::len(dims); i++)
+        _dims[i] = nb::cast<int>(dims[i]);
     BufferWrapper<double> array_buf  ("array",  array,  false, _dims);
 
 }
@@ -626,7 +629,7 @@ void _block_moment(T* tod_data, T* output, int bsize, int moment, bool central,
 }
 
 template <typename T>
-void block_moment(const bp::object & tod, const bp::object & out, int bsize, int moment, bool central, int shift)
+void block_moment(const nb::object & tod, const nb::object & out, int bsize, int moment, bool central, int shift)
 {
     BufferWrapper<T> tod_buf  ("tod",  tod,  false, std::vector<int>{-1, -1});
     int ndet = tod_buf->shape[0];
@@ -684,7 +687,7 @@ void _block_minmax(T* tod_data, T* output, int bsize, int mode, int ndet, int ns
 }
 
 template <typename T>
-void block_minmax(const bp::object & tod, const bp::object & out, int bsize, int mode, int shift)
+void block_minmax(const nb::object & tod, const nb::object & out, int bsize, int mode, int shift)
 {
     BufferWrapper<T> tod_buf  ("tod",  tod,  false, std::vector<int>{-1, -1});
     int ndet = tod_buf->shape[0];
@@ -726,7 +729,7 @@ void _clean_flag(int* flag_data, int width, int ndet, int nsamp)
     }
 }
 
-void clean_flag(const bp::object & flag, int width)
+void clean_flag(const nb::object & flag, int width)
 {
     BufferWrapper<int> flag_buf  ("flag", flag, false, std::vector<int>{-1, -1});
     int ndet = flag_buf->shape[0];
@@ -791,7 +794,7 @@ void _jumps_matched_filter(T* tod_data, T* output, int bsize, int shift, int nde
 }
 
 template <typename T>
-void matched_jumps(const bp::object & tod, const bp::object & out, const bp::object & min_size, int bsize)
+void matched_jumps(const nb::object & tod, const nb::object & out, const nb::object & min_size, int bsize)
 {
     BufferWrapper<T> tod_buf  ("tod",  tod,  false, std::vector<int>{-1, -1});
     int ndet = tod_buf->shape[0];
@@ -808,7 +811,7 @@ void matched_jumps(const bp::object & tod, const bp::object & out, const bp::obj
         throw buffer_exception("min_size must be C-contiguous along last axis");
     T* size = (T*)size_buf->buf;
     T* buffer = new T[ndet * nsamp];
-    
+
     int half_win = bsize / 2;
     int quarter_win = bsize / 4;
 
@@ -817,12 +820,12 @@ void matched_jumps(const bp::object & tod, const bp::object & out, const bp::obj
     // For this first round of cleaning we use min_size/2
     // Note that after this filtering we are left with at least win_size/4 width
     _jumps_thresh_on_mfilt(buffer, output, size, bsize, 0, (T).5, false, false, ndet, nsamp);
-    // Clean spurs 
+    // Clean spurs
     _clean_flag(output, quarter_win, ndet, nsamp);
     // Recall that we set _min_size to be half the actual peak min above
     // We allow for .5 samples worth of uncertainty here
     _jumps_thresh_on_mfilt(buffer, output, size, bsize, 0, (T)1., true, true, ndet, nsamp);
-    
+
     // Now do the shifted filter
     _jumps_matched_filter(tod_data, buffer, bsize, half_win, ndet, nsamp);
     int* shift_flag = new int[ndet * nsamp];
@@ -837,14 +840,14 @@ void matched_jumps(const bp::object & tod, const bp::object & out, const bp::obj
         int ioff = di*nsamp;
         for(int si = 0; si < nsamp; si++) {
             int i = ioff + si;
-            output[i] = output[i] || shift_flag[i]; 
+            output[i] = output[i] || shift_flag[i];
         }
     }
     delete shift_flag;
 }
 
 template <typename T>
-void find_quantized_jumps(const bp::object & tod, const bp::object & out, const bp::object & atol, int win_size, T scale)
+void find_quantized_jumps(const nb::object & tod, const nb::object & out, const nb::object & atol, int win_size, T scale)
 {
     BufferWrapper<T> tod_buf  ("tod",  tod,  false, std::vector<int>{-1, -1});
     int ndet = tod_buf->shape[0];
@@ -889,7 +892,7 @@ void find_quantized_jumps(const bp::object & tod, const bp::object & out, const
 }
 
 template <typename T>
-void subtract_jump_heights(const bp::object & tod, const bp::object & out, const bp::object & heights, const bp::object & jumps) {
+void subtract_jump_heights(const nb::object & tod, const nb::object & out, const nb::object & heights, const nb::object & jumps) {
     BufferWrapper<T> tod_buf  ("tod",  tod,  false, std::vector<int>{-1, -1});
     int ndet = tod_buf->shape[0];
     int nsamp = tod_buf->shape[1];
@@ -966,7 +969,7 @@ void _linear_interp(const double* x, const double* y, const double* x_interp,
         // Points above maximum value
         else if (x_interp[si] >= x_max) {
             y_interp[si] = y[n_x - 1] + slope_right * (x_interp[si] - x_max);
-        } 
+        }
         else {
             y_interp[si] = gsl_spline_eval(spline, x_interp[si], acc);
         }
@@ -974,8 +977,8 @@ void _linear_interp(const double* x, const double* y, const double* x_interp,
 }
 
 template <typename T>
-void _interp1d(const bp::object & x, const bp::object & y, const bp::object & x_interp,
-               bp::object & y_interp, const gsl_interp_type* interp_type,
+void _interp1d(const nb::object & x, const nb::object & y, const nb::object & x_interp,
+               nb::object & y_interp, const gsl_interp_type* interp_type,
                _interp_func_pointer<T> interp_func)
 {
     BufferWrapper<T> y_buf  ("y",  y,  false, std::vector<int>{-1, -1});
@@ -1039,7 +1042,7 @@ void _interp1d(const bp::object & x, const bp::object & y, const bp::object & x_
         // Transform x and x_interp to double arrays for gsl
         double x_dbl[n_x], x_interp_dbl[n_x_interp];
 
-        std::transform(x_data, x_data + n_x, x_dbl, 
+        std::transform(x_data, x_data + n_x, x_dbl,
                        [](float value) { return static_cast<double>(value); });
 
         std::transform(x_interp_data, x_interp_data + n_x_interp, x_interp_dbl,
@@ -1078,8 +1081,8 @@ void _interp1d(const bp::object & x, const bp::object & y, const bp::object & x_
     }
 }
 
-void interp1d_linear(const bp::object & x, const bp::object & y,
-                     const bp::object & x_interp, bp::object & y_interp)
+void interp1d_linear(const nb::object & x, const nb::object & y,
+                     const nb::object & x_interp, nb::object & y_interp)
 {
     // Get data type
     int dtype = get_dtype(y);
@@ -1216,7 +1219,7 @@ void _detrend(T* data, const int ndets, const int nsamps, const int row_stride,
 }
 
 template <typename T>
-void _detrend_buffer(bp::object & tod, const std::string & method,
+void _detrend_buffer(nb::object & tod, const std::string & method,
                      const int linear_ncount)
 {
     BufferWrapper<T> tod_buf  ("tod",  tod,  false, std::vector<int>{-1, -1});
@@ -1244,7 +1247,7 @@ void _detrend_buffer(bp::object & tod, const std::string & method,
     _detrend<T>(tod_data, ndets, nsamps, row_stride, method, linear_ncount, nthreads);
 }
 
-void detrend(bp::object & tod, const std::string & method, const int linear_ncount)
+void detrend(nb::object & tod, const std::string & method, const int linear_ncount)
 {
     // Get data type
     int dtype = get_dtype(tod);
@@ -1260,162 +1263,267 @@ void detrend(bp::object & tod, const std::string & method, const int linear_ncou
     }
 }
 
-PYBINDINGS("so3g")
-{
-    bp::def("nmat_detvecs_apply", nmat_detvecs_apply);
-    bp::def("process_cuts",  process_cuts);
-    bp::def("translate_cuts", translate_cuts);
-    bp::def("get_gap_fill_poly",  get_gap_fill_poly<float>,
-            "get_gap_fill_poly(ranges, signal, buffer, order, extract)\n"
-            "\n"
-            "Do polynomial gap-filling on a float32 array.\n"
-            "\n"
-            "Args:\n"
-            "  ranges: RangesMatrix with shape (ndet, nsamp)\n"
-            "  signal: data array (float32) with shape (ndet, nsamp)\n"
-            "  buffer: integer stating max number of samples to use on each end\n"
-            "  order: order of polynomial to use (1 means linear)\n"
-            "  inplace: whether to overwrite data array with the model\n"
-            "  extract: array to write the original data samples (inplace)\n"
-            "    or the model (!inplace) into.\n");
-    bp::def("get_gap_fill_poly64",  get_gap_fill_poly<double>,
-            "get_gap_fill_poly64(ranges, signal, buffer, order, extract)\n"
-            "\n"
-            "Do polynomial gap-filling for float64 data.\n"
-            "\n"
-            "See details in docstring for get_gap_fill_poly.\n");
-    bp::def("test_buffer_wrapper", test_buffer_wrapper,
-            "Pass array and list of dims to match against its shape.");
-    bp::def("block_moment", block_moment<float>,
-            "block_moment(tod, out, bsize, moment, central, shift)\n"
-            "\n"
-            "Compute the nth moment in blocks on a float32 array.\n"
-            "\n"
-            "Args:\n"
-            "  tod: data array (float32) with shape (ndet, nsamp)\n"
-            "  out: output array (float32) with shape (ndet, nsamp)\n"
-            "       can be the same as tod\n"
-            "  bsize: number of samples in each block\n"
-            "  moment: moment to compute, should be >= 1\n"
-            "  central: whether to compute the central moment in each block\n"
-            "  shift: sample to start block at, used in each row\n");
-    bp::def("block_moment64", block_moment<double>,
-            "block_moment64(tod, out, bsize, moment, central, shift)\n"
-            "\n"
-            "Compute the nth moment in blocks on a float32 array.\n"
-            "\n"
-            "See details in docstring for block_moment.\n");
-    bp::def("block_minmax", block_minmax<float>,
-            "block_minmax(tod, out, bsize, mode, shift)\n"
-            "\n"
-            "Compute the minimum, maximum, or peak to peak in blocks on a float32 array.\n"
-            "\n"
-            "Args:\n"
-            "  tod: data array (float32) with shape (ndet, nsamp)\n"
-            "  out: output array (float32) with shape (ndet, nsamp)\n"
-            "       can be the same as tod\n"
-            "  bsize: number of samples in each block\n"
-            "  mode: if 0 compute the block minimum, if 1 the maximum, anything else will compute the peak to peak\n"
-            "  shift: sample to start block at, used in each row\n");
-    bp::def("block_minmax64", block_minmax<double>,
-            "block_minmax64(tod, out, bsize, mode, shift)\n"
-            "\n"
-            "Compute the minimum, maximum, or peak to peak in blocks on a float64 array.\n"
-            "\n"
-            "See details in docstring for block_minmax.\n");
-    bp::def("matched_jumps", matched_jumps<float>,
-            "matched_jumps(tod, out, min_size, bsize)\n"
-            "\n"
-            "Flag jumps with the matched filter for a unit jump in a float32 array.\n"
-            "\n"
-            "Args:\n"
-            "  tod: data array (float32) with shape (ndet, nsamp)\n"
-            "  out: output array (int32) with shape (ndet, nsamp)\n"
-            "  min_size: minimum jump size for each det, shape (ndet,)\n"
-            "  bsize: number of samples in each block\n");
-    bp::def("matched_jumps64", matched_jumps<double>,
-            "matched_jumps64(tod, out, min_size, bsize)\n"
-            "\n"
-            "Flag jumps with the matched filter for a unit jump in a float64 array.\n"
-            "\n"
-            "See details in docstring for matched_jumps.\n");
-    bp::def("find_quantized_jumps", find_quantized_jumps<float>,
-            "find_quantized_jumps(tod, out, atol, win_size, scale)"
-            "\n"
-            "Search for jumps that are a multiple of a known value in a float32 array.\n"
-            "Output will be 0 where jumps are not found and the assumed jump height where jumps are found.\n"
-            "\n"
-            "Args:\n"
-            "  tod: data array (float32) with shape (ndet, nsamp)\n"
-            "  out: output array (float32) with shape (ndet, nsamp)\n"
-            "  atol: how close to the multiple of scale a value needs to be to be a jump in the same units as the signal\n"
-            "        should be an array (float32) with shape (ndet,)\n"
-            "  win_size: size of window to use as buffer when differencing\n"
-            "  scale: the scale of jumps to look for\n");
-    bp::def("find_quantized_jumps64", find_quantized_jumps<double>,
-            "find_quantized_jumps64(tod, out, atol, win_size, scale)\n"
-            "\n"
-            "Search for jumps that are a multiple of a known value in a float64 array.\n"
-            "Output will be 0 where jumps are not found and the assumed jump height where jumps are found.\n"
-            "\n"
-            "See details in docstring for find_quantized_jumps.\n");
-    bp::def("subtract_jump_heights", subtract_jump_heights<float>,
-            "subtract_jump_heights(tod, out, heights, jumps)"
-            "\n"
-            "For each detector, compute the cumulative effect of the jumps identified by the array 'heights' and the RangesMatrix 'jumps'."
-            "For each range in 'jumps', the values from 'heights' are checked and the size of the jump is either the largest positive"
-            "or the largest negative number (whichever has the largest absolute value)."
-            "The 'output' value is the difference of 'tod' and the accumulated jump vector."
-            "\n"
-            "Args:\n"
-            "  tod: data array (float32) with shape (ndet, nsamp)\n"
-            "  out: output array (float32) with shape (ndet, nsamp)\n"
-            "       can be the same as tod\n"
-            "  heights: the height of the jump at each samples\n"
-            "           should be an array (float32) with shape (ndet, nsamp)\n"
-            "  jumps: RangesMatrix with the jump locations and shape (ndet, nsamp).\n");
-    bp::def("subtract_jump_heights64", subtract_jump_heights<double>,
-            "subtract_jump_heights64(tod, out, heights, jumps)"
-            "\n"
-            "Subtract fit jump heights from known jump locatations in a float64 array."
-            "If multiple samples in a jump have different heights, the largest height is used.\n"
-            "\n"
-            "See details in docstring for subtract_jump_heights.\n");
-    bp::def("clean_flag", clean_flag,
-            "clean_flag(flag, width)"
-            "\n"
-            "Clean a flag inplace by unflagging regions without enough contiguous flagged values.\n"
-            "\n"
-            "Args:\n"
-            "  flag: flag array (int) with shape (ndet, nsamp)\n"
-            "  width: the minimum number of contiguous flagged samples\n");
-    bp::def("interp1d_linear", interp1d_linear,
-            "interp1d_linear(x, y, x_interp, y_interp)"
-            "\n"
-            "Perform linear interpolation over rows of float32 or float64 array with GSL.\n"
-            "This function uses OMP to parallelize over the dets (rows) axis.\n"
-            "Vector x must be strictly increasing. Values for x_interp beyond the "
-            "domain of x will be computed based on extrapolation."
-            "\n"
-            "Args:\n"
-            "  x: independent variable (float32/float64) of data with shape (nsamp,)\n"
-            "  y: data array (float32/float64) with shape (ndet, nsamp)\n"
-            "  x_interp: independent variable (float32/float64) for interpolated data "
-            "            with shape (nsamp_interp,)\n"
-            "  y_interp: interpolated data array (float32/float64) output buffer to be modified "
-            "            with shape (ndet, nsamp_interp)\n");
-    bp::def("detrend", detrend,
-            "detrend(tod, method, ncount)"
-            "\n"
-            "Detrend each row of an array (float32/float64). This function uses OMP to parallelize "
-            "over the dets (rows) axis."
-            "\n"
-            "Args:\n"
-            "  tod: input array (float32/float64) buffer with shape (ndet, nsamp) that is to be detrended. "
-            "       The data is modified in place.\n"
-            "  method: how to detrend data.  Options are 'mean', 'median', and 'linear'. Linear calculates "
-            "          and subtracts the slope from either end of each row as determined from 'linear_ncount'.\n"
-            "  linear_ncount: Number (int) of samples to use on each end, when measuring mean level for 'linear'"
-            "                 detrend. Must be a positive integer or -1.  If -1, nsamps / 2 will be used. Values "
-            "                 larger than 1 suppress the influence of white noise.\n");
-}
\ No newline at end of file
+
+void register_array_ops(nb::module_ & m) {
+    m.def("nmat_detvecs_apply", &nmat_detvecs_apply);
+    m.def("process_cuts", &process_cuts);
+    m.def("translate_cuts", &translate_cuts);
+    m.def("get_gap_fill_poly", &get_gap_fill_poly<float>,
+        nb::arg("ranges"),
+        nb::arg("signal"),
+        nb::arg("buffer"),
+        nb::arg("order"),
+        nb::arg("inplace"),
+        nb::arg("extract"),
+        R"(
+        Do polynomial gap-filling on a float32 array.
+
+        Args:
+            ranges: RangesMatrix with shape (ndet, nsamp)
+            signal: data array (float32) with shape (ndet, nsamp)
+            buffer: integer stating max number of samples to use on each end
+            order: order of polynomial to use (1 means linear)
+            inplace: whether to overwrite data array with the model
+            extract: array to write the original data samples (inplace)
+                or the model (!inplace) into.
+
+        Returns:
+            None
+
+        )"
+    );
+    m.def("get_gap_fill_poly64", &get_gap_fill_poly<double>,
+        R"(
+        Do polynomial gap-filling on a float64 array.
+
+        See details in docstring for get_gap_fill_poly.
+        )"
+    );
+    m.def("test_buffer_wrapper", &test_buffer_wrapper,
+        R"(
+        Pass array and list of dims to match against its shape.
+        )"
+    );
+    m.def("block_moment", &block_moment<float>,
+        nb::arg("tod"),
+        nb::arg("out"),
+        nb::arg("bsize"),
+        nb::arg("moment"),
+        nb::arg("central"),
+        nb::arg("shift"),
+        R"(
+        Compute the nth moment in blocks on a float32 array.
+
+        Args:
+            tod: data array (float32) with shape (ndet, nsamp)
+            out: output array (float32) with shape (ndet, nsamp)
+                can be the same as tod
+            bsize: number of samples in each block
+            moment: moment to compute, should be >= 1
+            central: whether to compute the central moment in each block
+            shift: sample to start block at, used in each row
+
+        Returns:
+            None
+
+        )"
+    );
+    m.def("block_moment64", &block_moment<double>,
+        R"(
+        Compute the nth moment in blocks on a float64 array
+
+        See details in docstring for block_moment.
+        )"
+    );
+    m.def("block_minmax", &block_minmax<float>,
+        nb::arg("tod"),
+        nb::arg("out"),
+        nb::arg("bsize"),
+        nb::arg("mode"),
+        nb::arg("shift"),
+        R"(
+        Compute the minimum, maximum, or peak to peak in blocks on a float32 array.
+
+        Args:
+            tod: data array (float32) with shape (ndet, nsamp)
+            out: output array (float32) with shape (ndet, nsamp)
+                can be the same as tod
+            bsize: number of samples in each block
+            mode: if 0 compute the block minimum, if 1 the maximum, anything else will
+                compute the peak to peak
+            shift: sample to start block at, used in each row
+
+        Returns:
+            None
+
+        )"
+    );
+    m.def("block_minmax64", &block_minmax<double>,
+        R"(
+        Compute the minimum, maximum, or peak to peak in blocks on a float64 array.
+
+        See details in docstring for block_minmax.
+        )"
+    );
+    m.def("matched_jumps", &matched_jumps<float>,
+        nb::arg("tod"),
+        nb::arg("out"),
+        nb::arg("min_size"),
+        nb::arg("bsize"),
+        R"(
+        Flag jumps with the matched filter for a unit jump in a float32 array.
+
+        Args:
+            tod: data array (float32) with shape (ndet, nsamp)
+            out: output array (int32) with shape (ndet, nsamp)
+            min_size: minimum jump size for each det, shape (ndet,)
+            bsize: number of samples in each block
+
+        Returns:
+            None
+
+        )"
+    );
+    m.def("matched_jumps64", &matched_jumps<double>,
+        R"(
+        Flag jumps with the matched filter for a unit jump in a float64 array.
+
+        See details in docstring for matched_jumps.
+        )"
+    );
+    m.def("find_quantized_jumps", &find_quantized_jumps<float>,
+        nb::arg("tod"),
+        nb::arg("out"),
+        nb::arg("atol"),
+        nb::arg("win_size"),
+        nb::arg("scale"),
+        R"(
+        Search for jumps that are a multiple of a known value in a float32 array.
+
+        Output will be 0 where jumps are not found and the assumed jump height where
+        jumps are found.
+
+        Args:
+            tod: data array (float32) with shape (ndet, nsamp)
+            out: output array (float32) with shape (ndet, nsamp)
+            atol: how close to the multiple of scale a value needs to be to be a jump
+                in the same units as the signal. should be an array (float32) with
+                shape (ndet,)
+            win_size: size of window to use as buffer when differencing
+            scale: the scale of jumps to look for
+
+        Returns:
+            None
+
+        )"
+    );
+    m.def("find_quantized_jumps64", &find_quantized_jumps<double>,
+        R"(
+        Search for jumps that are a multiple of a known value in a float64 array.
+
+        See details in docstring for find_quantized_jumps.
+        )"
+    );
+    m.def("subtract_jump_heights", &subtract_jump_heights<float>,
+        nb::arg("tod"),
+        nb::arg("out"),
+        nb::arg("heights"),
+        nb::arg("jumps"),
+        R"(
+        Subtract cumulative jumps from a float32 TOD.
+
+        For each detector, compute the cumulative effect of the jumps identified by the
+        array 'heights' and the RangesMatrix 'jumps'.  For each range in 'jumps', the
+        values from 'heights' are checked and the size of the jump is either the
+        largest positive or the largest negative number (whichever has the largest
+        absolute value).  The 'output' value is the difference of 'tod' and the
+        accumulated jump vector.
+
+        Args:
+            tod: data array (float32) with shape (ndet, nsamp)
+            out: output array (float32) with shape (ndet, nsamp)
+                can be the same as tod
+            heights: the height of the jump at each samples
+                should be an array (float32) with shape (ndet, nsamp)
+            jumps: RangesMatrix with the jump locations and shape (ndet, nsamp).
+
+        Returns:
+            None
+
+        )"
+    );
+    m.def("subtract_jump_heights64", &subtract_jump_heights<double>,
+        R"(
+        Subtract cumulative jumps from a float64 TOD.
+
+        See details in docstring for subtract_jump_heights.
+        )"
+    );
+    m.def("clean_flag", &clean_flag,
+        nb::arg("flag"),
+        nb::arg("width"),
+        R"(
+        Unflag in-place regions without sufficient contiguous flagged values.
+
+        Args:
+            flag: flag array (int) with shape (ndet, nsamp)
+            width: the minimum number of contiguous flagged samples
+
+        Returns:
+            None
+
+        )"
+    );
+    m.def("interp1d_linear", &interp1d_linear,
+        nb::arg("x"),
+        nb::arg("y"),
+        nb::arg("x_interp"),
+        nb::arg("y_interp"),
+        R"(
+        Perform linear interpolation over rows of float32 or float64 array with GSL.
+
+        This function uses OMP to parallelize over the dets (rows) axis.
+        Vector x must be strictly increasing. Values for x_interp beyond the
+        domain of x will be computed based on extrapolation.
+
+        Args:
+            x: independent variable (float32/float64) of data with shape (nsamp,)
+            y: data array (float32/float64) with shape (ndet, nsamp)
+            x_interp: independent variable (float32/float64) for interpolated data
+                with shape (nsamp_interp,)
+            y_interp: interpolated data array (float32/float64) output buffer to be
+                modified with shape (ndet, nsamp_interp)
+
+        Returns:
+            None
+
+        )"
+    );
+    m.def("detrend", &detrend,
+        nb::arg("tod"),
+        nb::arg("method"),
+        nb::arg("linear_ncount"),
+        R"(
+        Detrend each row of an array (float32/float64).
+
+        This function uses OMP to parallelize over the dets (rows) axis.
+
+        Args:
+            tod: input array (float32/float64) buffer with shape (ndet, nsamp) that is
+                to be detrended.  The data is modified in place.
+            method: how to detrend data.  Options are 'mean', 'median', and 'linear'.
+                Linear calculates and subtracts the slope from either end of each row
+                as determined from 'linear_ncount'.
+            linear_ncount: Number (int) of samples to use on each end, when measuring
+                mean level for 'linear' detrend. Must be a positive integer or -1.  If
+                -1, nsamps / 2 will be used. Values larger than 1 suppress the
+                influence of white noise.
+
+        Returns:
+            None
+
+        )"
+    );
+
+    return;
+}
diff --git a/src/exceptions.cxx b/src/exceptions.cxx
index 3bd49238..9b392b78 100644
--- a/src/exceptions.cxx
+++ b/src/exceptions.cxx
@@ -1,32 +1,11 @@
-#include <boost/python/exception_translator.hpp>
 
-#include <container_pybindings.h>
 #include "exceptions.h"
 
-// Here we define the map from C++ exceptions defined in exceptions.h
-// to Python exceptions.  Currently we use built-in python exceptions,
-// with informative error messages.
-
-static void translate_RuntimeError(so3g_exception const& e)
-{
-    PyErr_SetString(PyExc_RuntimeError, e.msg_for_python().c_str());
-}
-
-static void translate_TypeError(so3g_exception const& e)
-{
-    PyErr_SetString(PyExc_TypeError, e.msg_for_python().c_str());
-}
-
-static void translate_ValueError(so3g_exception const& e)
-{
-    PyErr_SetString(PyExc_ValueError, e.msg_for_python().c_str());
-}
+namespace nb = nanobind;
 
 
-namespace bp = boost::python;
-PYBINDINGS("so3g")
-{
-    bp::register_exception_translator<RuntimeError_exception> (&translate_RuntimeError);
-    bp::register_exception_translator<TypeError_exception> (&translate_TypeError);
-    bp::register_exception_translator<ValueError_exception> (&translate_ValueError);
+void register_exceptions(nb::module_ & m) {
+    nb::exception<RuntimeError_exception>(m, "SO3G_RuntimeError", PyExc_RuntimeError);
+    nb::exception<TypeError_exception>(m, "SO3G_TypeError", PyExc_TypeError);
+    nb::exception<ValueError_exception>(m, "SO3G_ValueError", PyExc_ValueError);
 }
diff --git a/src/fitting_ops.cxx b/src/fitting_ops.cxx
index 11b68a9a..236a2731 100644
--- a/src/fitting_ops.cxx
+++ b/src/fitting_ops.cxx
@@ -1,5 +1,4 @@
 #define NO_IMPORT_ARRAY
-#define GLOG_USE_GLOG_EXPORT
 
 #include <stdint.h>
 #include <stdio.h>
@@ -7,14 +6,16 @@
 #include <string>
 #include <cmath>
 
-#include <boost/python.hpp>
+#ifdef _OPENMP
 #include <omp.h>
+#endif // ifdef _OPENMP
+
+#include <nanobind/nanobind.h>
 
 #include <gsl/gsl_statistics.h>
 #include <glog/logging.h>
 #include <ceres/ceres.h>
 
-#include <pybindings.h>
 #include "so3g_numpy.h"
 #include "numpy_assist.h"
 #include "Ranges.h"
@@ -319,8 +320,8 @@ void _fit_noise(const double* f, const double* log_f, const double* pxx,
 }
 
 template <typename T>
-void _fit_noise_buffer(const bp::object & f, const bp::object & pxx,
-                       bp::object & p, bp::object & c, const double lowf,
+void _fit_noise_buffer(const nb::object & f, const nb::object & pxx,
+                       nb::object & p, nb::object & c, const double lowf,
                        const double fwhite_lower, const double fwhite_upper,
                        const double tol, const int niter, const double epsilon)
 {
@@ -424,9 +425,9 @@ void _fit_noise_buffer(const bp::object & f, const bp::object & pxx,
     google::ShutdownGoogleLogging();
 }
 
-void fit_noise(const bp::object & f, const bp::object & pxx, bp::object & p, bp::object & c,
-               const double lowf, const double fwhite_lower, const double fwhite_upper,
-               const double tol, const int niter, const double epsilon)
+void fit_noise(const nb::object & f, const nb::object & pxx, nb::object & p,
+    nb::object & c, const double lowf, const double fwhite_lower, const double
+    fwhite_upper, const double tol, const int niter, const double epsilon)
 {
     // Get data type
     int dtype = get_dtype(pxx);
@@ -443,30 +444,52 @@ void fit_noise(const bp::object & f, const bp::object & pxx, bp::object & p, bp:
 }
 
 
-PYBINDINGS("so3g")
-{
-     bp::def("fit_noise", fit_noise,
-             "fit_noise(f, pxx, p, c, lowf, fwhite_lower, fwhite_upper, tol, niter, epsilon)"
-             "\n"
-             "Fits noise model with white and 1/f components to the PSD of signal. Uses a MLE\n"
-             "method that minimizes a log-likelihood. OMP is used to parallelize across dets (rows)."
-             "\n"
-             "Args:\n"
-             "  f: frequency array (float32/64) with dimensions (nsamps,).\n"
-             "     Should be positive definite and strictly increasing.\n"
-             "  pxx: PSD array (float32/64) with dimensions (ndets, nsamps).\n"
-             "  p: output parameter array (float32/64) with dimensions (ndets, nparams).\n"
-             "     This is modified in place and input values are ignored.\n"
-             "  c: output uncertaintiy array (float32/64) with dimensions (ndets, nparams).\n"
-             "     This is modified in place and input values are ignored.\n"
-             "  lowf: Frequency below which the 1/f noise index and fknee are estimated for initial\n"
-             "        guess passed to least_squares fit (float64).\n"
-             "  fwhite_lower: Lower frequency used to estimate white noise for initial guess passed to\n"
-             "                least_squares fit (float64).  Should be < fwhite_upper and >= lowf.\n"
-             "  fwhite_upper: Upper frequency used to estimate white noise for initial guess passed to\n"
-             "                least_squares fit (float64).  Should be > fwhite_lower and lowf.\n"
-             "  tol: absolute tolerance for minimization (float64).\n"
-             "  niter: total number of iterations to run minimization for (int).\n"
-             "  epsilon: Value to perturb gradients by when calculating uncertainties with the inverse\n"
-             "           Hessian matrix (float64). Affects minimization only.\n");
-}
\ No newline at end of file
+void register_fitting_ops(nb::module_ & m) {
+    m.def("fit_noise", &fit_noise,
+        nb::arg("f"),
+        nb::arg("pxx"),
+        nb::arg("p"),
+        nb::arg("c"),
+        nb::arg("lowf"),
+        nb::arg("fwhite_lower"),
+        nb::arg("fwhite_upper"),
+        nb::arg("tol"),
+        nb::arg("niter"),
+        nb::arg("epsilon"),
+        R"(
+
+        Fits noise model with white and 1/f components to the PSD of signal. Uses a
+        MLE method that minimizes a log-likelihood. OMP is used to parallelize across
+        dets (rows).
+
+        Args:
+            f (array): frequency array (float32/64) with dimensions (nsamps,).
+                Should be positive definite and strictly increasing.
+            pxx (array): PSD array (float32/64) with dimensions (ndets, nsamps).
+            p (array): output parameter array (float32/64) with dimensions
+                (ndets, nparams). This is modified in place and input values are
+                ignored.
+            c (array): output uncertaintiy array (float32/64) with dimensions
+                (ndets, nparams). This is modified in place and input values are
+                ignored.
+            lowf (float): Frequency below which the 1/f noise index and fknee are
+                estimated for initial guess passed to least_squares fit.
+            fwhite_lower (float): Lower frequency used to estimate white noise for
+                initial guess passed to least_squares fit.  Should be < fwhite_upper
+                and >= lowf.
+            fwhite_upper (float): Upper frequency used to estimate white noise for
+                initial guess passed to least_squares fit.  Should be > fwhite_lower
+                and lowf.
+            tol (float): absolute tolerance for minimization.
+            niter (int): total number of iterations to run minimization for.
+            epsilon (float): Value to perturb gradients by when calculating
+                uncertainties with the inverse Hessian matrix. Affects minimization
+                only.
+
+        Returns:
+            None
+
+        )"
+    );
+    return;
+}
diff --git a/src/hkagg.cxx b/src/hkagg.cxx
index 56df41b5..0e7732b2 100644
--- a/src/hkagg.cxx
+++ b/src/hkagg.cxx
@@ -1,60 +1,15 @@
-#include <pybindings.h>
 
 #include <iostream>
-#include <boost/python.hpp>
 
-#include <container_pybindings.h>
 #include <hkagg.h>
 
+namespace nb = nanobind;
 
-/* IrregBlockDouble */
-
-std::string IrregBlockDouble::Description() const
-{
-	std::ostringstream s;
-	s << "Double data (" << data.size() << " vectors) with timestamp.";
-	return s.str();
-}
-
-std::string IrregBlockDouble::Summary() const
-{
-    return Description();
-}
-
-template <class A> void IrregBlockDouble::serialize(A &ar, unsigned v)
-{
-	using namespace cereal;
-        // v is the version code!
-
-	ar & make_nvp("G3FrameObject", base_class<G3FrameObject>(this));
-	ar & make_nvp("prefix", prefix);
-	ar & make_nvp("t", t);
-	ar & make_nvp("data", data);
-}
-
-
-G3_SERIALIZABLE_CODE(IrregBlockDouble);
-
-
-namespace bp = boost::python;
-
-PYBINDINGS("so3g")
-{
-    EXPORT_FRAMEOBJECT(IrregBlockDouble, init<>(),
-    "Data block for irregularly sampled data.")
-    .def_readwrite("prefix", &IrregBlockDouble::prefix,
-    "Prefix for field names.")
-    .def_readwrite("data", &IrregBlockDouble::data,
-    "Map to HK data vectors.")
-    .def_readwrite("t", &IrregBlockDouble::t,
-    "Timestamp vector.")
-    ;
-
-    bp::enum_<HKFrameType>("HKFrameType",
-                           "Identifier for generic HK streams.")
-        .value("session", HKFrameType::session)
-        .value("status",  HKFrameType::status)
-        .value("data",    HKFrameType::data)
-        ;
 
+void register_hkagg(nb::module_ & m) {
+    nb::enum_<HKFrameType>(m, "HKFrameType", "Identifier for generic HK streams.")
+    .value("session", HKFrameType::session)
+    .value("status", HKFrameType::status)
+    .value("data", HKFrameType::data)
+    .export_values();
 }
diff --git a/src/main.cxx b/src/main.cxx
index 97b761ca..4fc323be 100644
--- a/src/main.cxx
+++ b/src/main.cxx
@@ -1,14 +1,14 @@
-#include <boost/python.hpp>
 #ifdef _OPENMP
 # include <omp.h>
 #endif // ifdef _OPENMP
 
-// See this header file for discussion of numpy compilation issues.
-#include "so3g_numpy.h"
+#include <string>
 
-#include <container_pybindings.h>
+#include <nanobind/nanobind.h>
+#include <nanobind/stl/string.h>
 
-#include <string>
+// See this header file for discussion of numpy compilation issues.
+#include "so3g_numpy.h"
 
 // Note _version.h is supposed to be auto-generated during build.  If
 // that breaks at some point, you can replace it with a single
@@ -16,14 +16,28 @@
 //   #define SO3G_VERSION_STRING "unknown"
 #include "_version.h"
 
-namespace bp = boost::python;
+// Include headers with registration functions
+#include "exceptions.h"
+#include "hkagg.h"
+#include "so_linterp.h"
+#include "Butterworth.h"
+#include "Intervals.h"
+#include "Ranges.h"
+#include "Projection.h"
+
+// Declaration here, since there is no header file for array_ops.
+void register_array_ops(nb::module_ & m);
+
+
+namespace nb = nanobind;
+
 
 const std::string version()
 {
     return SO3G_VERSION_STRING;
 }
 
-bp::object useful_info() {
+nb::object useful_info() {
     int omp_num_threads = 1;
 #pragma omp parallel
     {
@@ -32,26 +46,31 @@ bp::object useful_info() {
             omp_num_threads = omp_get_num_threads();
         #endif
     }
-    bp::dict output;
+    nb::dict output;
     output["omp_num_threads"] = omp_num_threads;
     output["version"] = version();
     return output;
 }
 
 
-
-
-PYBINDINGS("so3g") {
-    bp::def("version", version);
-    bp::def("useful_info", useful_info);
-}
-
 static void* _so3g_import_array() {
     import_array();
     return NULL;
 }
 
-BOOST_PYTHON_MODULE(so3g) {
+
+NB_MODULE(libso3g, m) {
     _so3g_import_array();
-    G3ModuleRegistrator::CallRegistrarsFor("so3g");
+
+    m.def("version", &version);
+    m.def("useful_info", &useful_info);
+
+    register_exceptions(m);
+    register_hkagg(m);
+    register_butterworth(m);
+    register_so_linterp(m);
+    register_intervals(m);
+    register_ranges(m);
+    register_array_ops(m);
+    register_projection(m);
 }
diff --git a/src/so_linterp.cxx b/src/so_linterp.cxx
index 2758f4e0..847ee86c 100644
--- a/src/so_linterp.cxx
+++ b/src/so_linterp.cxx
@@ -1,9 +1,9 @@
 #include <so_linterp.h>
-#include <pybindings.h>
 
 #include <cmath>
-#include <boost/python.hpp>
-namespace bp = boost::python;
+
+namespace nb = nanobind;
+
 
 double test_trig(int table_size, int verbose)
 {
@@ -36,8 +36,8 @@ double test_trig(int table_size, int verbose)
     return worst;
 }
 
-PYBINDINGS("so3g")
-{
-    bp::def("test_trig", test_trig,
-        "For use in test suite -- determines worst arctrig discrepancy, in radians.");
+
+void register_so_linterp(nb::module_ & m) {
+    m.def("test_trig", &test_trig);
+    return;
 }
diff --git a/test/test_butterworth.py b/test/test_butterworth.py
index 1d469f9a..2178d91a 100644
--- a/test/test_butterworth.py
+++ b/test/test_butterworth.py
@@ -5,20 +5,21 @@
 
 import ctypes
 
+
 bank = so3g.BFilterBank() \
            .add(so3g.BFilterParams(32092,15750,14,3,5)) \
            .add(so3g.BFilterParams(31238,14895,14,3,12)) \
            .init(1)
 
 n = 100
-a1 = np.ones(n, 'int32')
-b1 = 0*a1
+a1 = np.ones(n, np.int32)
+b1 = 0 * a1
 
 bank.apply(a1, b1)
 assert np.all(b1[::10] == [   0,    5,   45,  151,  329,  560,  809, 1042, 1230, 1357])
 
-a2 = a1.astype('float32')
-b2 = a2*0
+a2 = a1.astype(np.float32)
+b2 = a2 * 0
 
 bank.init(1)
 bank.apply(a2, b2)
@@ -29,32 +30,32 @@
 with pytest.raises(TypeError) as einfo:
     bank.apply(a2[::2], b2[::2])
 
-print('Successul exception:', einfo)    
+print('Successul exception:', einfo)
 
 
 # Fail if arrays not same type?
 with pytest.raises(RuntimeError) as einfo:
-    bank.apply(a2, b2.astype('float64'))
-    
+    bank.apply(a2, b2.astype(np.float64))
+
 print('Successful exception:', einfo)
 
 
 # Fail if arrays not an eligible type?
 with pytest.raises(ValueError) as einfo:
-    bank.apply(a2.astype('float64'), b2.astype('float64'))
-    
+    bank.apply(a2.astype(np.float64), b2.astype(np.float64))
+
 print('Successful exception:', einfo)
 
 
 # Fail if arrays not right shape?
 with pytest.raises(RuntimeError) as einfo:
     bank.apply(a2[None,:], b2[None,:])
-    
+
 print('Successful exception:', einfo)
 
 
 # Fail if arrays not same shape?
 with pytest.raises(RuntimeError) as einfo:
     bank.apply(a2, b2[:-1])
-    
+
 print('Successful exception:', einfo)
diff --git a/test/test_intervals.py b/test/test_intervals.py
index ba93416b..2ce31847 100644
--- a/test/test_intervals.py
+++ b/test/test_intervals.py
@@ -14,7 +14,6 @@ def length_tests(iv, rows, indent_text='    '):
 for dtype in [
         so3g.IntervalsDouble,
         so3g.IntervalsInt,
-        so3g.IntervalsTime,
 ]:
     print('    ', dtype)
     o = dtype()
@@ -66,7 +65,7 @@ def length_tests(iv, rows, indent_text='    '):
               ivx.domain)
         assert( lo >= max(lo0, lo1) and
                 (hi==lo or hi <= min(hi0, hi1)) )
-        
+
 print()
 print('Testing import.')
 iv0 = so3g.IntervalsDouble()\
@@ -75,9 +74,14 @@ def length_tests(iv, rows, indent_text='    '):
 
 
 iv1 = iv0.copy()
+print('iv0 = ', iv0)
+print('iv1 = ', iv1, flush=True)
+print('iv0.array() = ', iv0.array(), flush=True)
+print('iv1.array() = ', iv1.array(), flush=True)
 assert(np.all(iv0.array() == iv1.array()))
 
 iv1 = so3g.IntervalsDouble.from_array(iv0.array())
+print('iv1 = iv0.from_array(): ', iv1, flush=True)
 assert(np.all(iv0.array() == iv1.array()))
 
 
@@ -96,17 +100,9 @@ def length_tests(iv, rows, indent_text='    '):
 assert(len((iv2 - iv1).array()) == 4)
 
 
-print('Sanity check on G3Time')
-ti = so3g.IntervalsTime()\
-    .add_interval(core.G3Time('2018-1-1T00:00:00'),
-                  core.G3Time('2018-1-2T00:00:00'))
-print('    ', ti)
-print('    ', ti.array())
-print('    ', (-ti).array())
-
 print()
 print('Interval <-> mask testing')
-mask = np.zeros(20, 'uint16')
+mask = np.zeros(20, np.uint16)
 n_bit, target_bit = 16, 12
 for ikill, nint in [(None, 0),
                     (19,   1),
@@ -125,6 +121,7 @@ def length_tests(iv, rows, indent_text='    '):
 
 print('... to mask.')
 mask1 = so3g.IntervalsInt.mask(iv,-1)
+print(f"mask = {mask}, mask1 = {mask1}", flush=True)
 assert(np.all(mask == mask1))
 
 print('...bit-width checking works?')