Analyzing SEC-SAXS data
************************

The following examples shows how to carry out analysis on SEC-SAXS data.

Finding and setting buffer and sample regions
+++++++++++++++++++++++++++++++++++++++++++++++

.. code-block:: python

    import glob
    import os
    import bioxtasraw.RAWAPI as raw

    #Load the series
    profile_names = sorted(glob.glob('./series_data/sec_sample_1/profile_001_*.dat'))
    profiles = raw.load_profiles(profile_names)
    series = raw.profiles_to_series(profiles)

    #Find an appropriate buffer range for subtraction
    success, start_idx, end_idx = raw.find_buffer_range(series)

    #Set the buffer range for the series
    buffer_range = [[start_idx, end_idx]]

    (sub_profiles, rg, rger, i0, i0er, vcmw, vcmwer,
        vpmw) = raw.set_buffer_range(series, buffer_range)

    #Find an appropriate sample range for subtraction
    success, start_idx, end_idx = raw.find_sample_range(series)

    #Set the sample range for the series
    sample_range = [[start_idx, end_idx]]

    sub_profile = raw.set_sample_range(series, sample_range)

    #Save the analysis done to the series
    if not os.path.exists('./api_results'):
        os.mkdir('./api_results')

    raw.save_series(series, 'profile_series.hdf5', './api_results')


Once you have the subtracted profile generated by ``set_sample_region`` you
can carry out analysis on the individual profile as in :ref:`the scattering
profile analysis tutorial. <profile_analysis>`


Applying a linear baseline correction
+++++++++++++++++++++++++++++++++++++++

.. code-block:: python

    import os
    import bioxtasraw.RAWAPI as raw

    #Load series
    xyl_series = raw.load_series(['./series_data/xylanase.hdf5'])[0]

    #Set buffer range
    success, start, end = raw.find_buffer_range(xyl_series)

    (sub_profiles, rg, rger, i0, i0er, vcmw, vcmwer,
        vpmw) = raw.set_buffer_range(xyl_series, [[start, end]])

    #Validate baseline range
    (lin_valid, lin_valid_results, lin_similarity_results, lin_svd_results,
        lin_intI_results, lin_other_results) = raw.validate_baseline_range(
        xyl_series, [0, 10], [1132, 1142], 'Linear')

    #Do baseline correction
    (lin_bl_cor_profiles, lin_rg, lin_rger, lin_i0, lin_i0er, lin_vcmw, lin_vcmwer,
        lin_vpmw, lin_bl_corr, lin_fit_results) = raw.set_baseline_correction(
        xyl_series, [0, 10], [1132, 1142], 'Linear')

    #Find an appropriate sample range
    success, start_idx, end_idx = raw.find_sample_range(xyl_series,
        profile_type='baseline')

    sample_range = [[start_idx, end_idx]]

    sub_profile = raw.set_sample_range(xyl_series, sample_range,
        profile_type='baseline')

    #Save the analysis done to the series
    if not os.path.exists('./api_results'):
        os.mkdir('./api_results')

    raw.save_series(xyl_series, 'xyl_series.hdf5', './api_results')

Note that setting a buffer range is only necessary if buffer subtraction has
not already been performed on the series.

Applying an integral baseline correction
+++++++++++++++++++++++++++++++++++++++++

.. code-block:: python

    import os
    import bioxtasraw.RAWAPI as raw

    #Load series
    series = raw.load_series(['./series_data/baseline.hdf5'])[0]

    #Find baseline range
    (start_found, end_found, start_range,
        end_range) = raw.find_baseline_range(series)

    #Do baseline correction
    (int_bl_cor_profiles, int_rg, int_rger, int_i0, int_i0er, int_vcmw,
        int_vcmwer, int_vpmw, int_bl_corr,
        int_fit_results) = raw.set_baseline_correction(series, start_range,
        end_range, 'Integral')

    #Set an appropriate sample range for subtraction
    success, start_idx, end_idx = raw.find_sample_range(series,
        profile_type='baseline')

    sample_range = [[start_idx, end_idx]]

    sub_profile = raw.set_sample_range(series, sample_range,
        profile_type='baseline')

    #Save the analysis done to the series
    if not os.path.exists('./api_results'):
        os.mkdir('./api_results')

    raw.save_series(series, 'profile_series_bl.hdf5', './api_results')

Validating buffer and sample regions
+++++++++++++++++++++++++++++++++++++

You can validate whatever buffer or sample region you want to set. Note that
this validation is done as part of the ``find_buffer_region`` and
``find_sample_region`` functions, so there's no need to do it on regions found
with those functions.

.. code-block:: python

    import bioxtasraw.RAWAPI as raw

    #Load series
    xyl_series = raw.load_series(['./series_data/xylanase.hdf5'])[0]

    #Validate buffer region
    buffer_range = [[180, 240], [500, 560]]

    (valid, similarity_results, svd_results,
        intI_results) = raw.validate_buffer_range(xyl_series, buffer_range)

    if not valid:
        success, start, end = raw.find_buffer_range(xyl_series)
        buffer_range = [[start, end]]

    (sub_profiles, rg, rger, i0, i0er, vcmw, vcmwer,
            vpmw) = raw.set_buffer_range(xyl_series, [[start, end]])

    #Validate sample region
    sample_range = [[785, 815]]

    (valid, similarity_results, param_results, svd_results,
        sn_results) = raw.validate_sample_range(xyl_series, sample_range)

    if not valid:
        success, start_idx, end_idx = raw.find_sample_range(xyl_series)
        sample_range = [[start_idx, end_idx]]

    sub_profile = raw.set_sample_range(xyl_series, sample_range)

Validating baseline regions
++++++++++++++++++++++++++++

You can validate baseline regions. Note that this validation is done as
part of the ``find_baseline_region`` for an integral baseline, so is not
necessary in that case. Also, the linear baseline validation is not terribly
useful at the moment, it almost always returns invalid.

.. code-block:: python

    import bioxtasraw.RAWAPI as raw

    #Load series
    series = raw.load_series(['./series_data/baseline.hdf5'])[0]

    #Validate linear baseline range
    (valid, valid_results, similarity_results, svd_results, intI_results,
        other_results) = raw.validate_baseline_range( series, [0, 10],
        [953, 963], 'Linear')

    #Validate integral baseline range
    (valid, valid_results, similarity_results, svd_results, intI_results,
        other_results) = raw.validate_baseline_range(series, [539, 568],
        [817, 846])

Carrying out SVD, EFA, and REGALS
++++++++++++++++++++++++++++++++++

You can carry out SVD, EFA, and REGALS from the API (though without the GUI
you have to know what the appropriate ranges are for each EFA component as input).

.. code-block:: python

    import bioxtasraw.RAWAPI as raw

    # Load data
    phehc_series = raw.load_series(['./series_data/phehc_sec.hdf5'])[0]

    #Do SVD
    svd_s, svd_U, svd_V = raw.svd(phehc_series)

    #Do EFA
    efa_ranges = [[149, 197], [164, 321], [320, 364]]

    (efa_profiles, efa_converged, efa_conv_data,
        efa_rotation_data) = raw.efa(phehc_series, efa_ranges)

    # Do REGALS
    prof1_settings = {
        'type'          : 'simple',
        'lambda'        : 0.0,
        'auto_lambda'   : True,
        'kwargs'        : {},
        }

    conc1_settings = {
        'type'          : 'smooth',
        'lambda'        : 1.0,
        'auto_lambda'   : True,
        'kwargs'                : {
            'xmin'              : 145,
            'xmax'              : 195,
            'Nw'                : 50,
            'is_zero_at_xmin'   : True,
            'is_zero_at_xmax'   : True,
            }
        }

    prof2_settings = {
        'type'          : 'simple',
        'lambda'        : 0.0,
        'auto_lambda'   : True,
        'kwargs'        : {},
        }

    conc2_settings = {
        'type'          : 'smooth',
        'lambda'        : 3.0e3,
        'auto_lambda'   : False,
        'kwargs'                : {
            'xmin'              : 160,
            'xmax'              : 325,
            'Nw'                : 50,
            'is_zero_at_xmin'   : True,
            'is_zero_at_xmax'   : True,
            }
        }

    prof3_settings = {
        'type'          : 'simple',
        'lambda'        : 0.0,
        'auto_lambda'   : True,
        'kwargs'        : {},
        }

    conc3_settings = {
        'type'          : 'smooth',
        'lambda'        : 1.0,
        'auto_lambda'   : True,
        'kwargs'                : {
            'xmin'              : 320,
            'xmax'              : 383,
            'Nw'                : 50,
            'is_zero_at_xmin'   : True,
            'is_zero_at_xmax'   : True,
            }
        }

    comp_settings = [(prof1_settings, conc1_settings),
        (prof2_settings, conc2_settings), (prof3_settings, conc3_settings)]

    (regals_profiles, regals_ifts, concs, reg_concs, mixture, params,
        residual) = raw.regals(phehc_series, comp_settings)
