NAME¶
slurm_step_launch_params_t_init, slurm_step_launch,
  slurm_step_launch_fwd_signal, slurm_step_launch_wait_start,
  slurm_step_launch_wait_finish, slurm_step_launch_abort - Slurm job step launch
  functions
SYNTAX¶
#include <slurm/slurm.h>
void 
slurm_step_launch_params_t_init (
 
	slurm_step_launch_params_t * 
launch_req
 
);
int 
slurm_step_launch (
 
	slurm_step_ctx 
ctx,
 
	const slurm_step_launch_params_t * 
launch_req,
 
	const slurm_step_launch_callbacks_t 
callbacks
 
);
void 
slurm_step_launch_fwd_signal (
 
	slurm_step_ctx 
ctx,
 
	int 
signo
 
);
int 
slurm_step_launch_wait_start (
 
	slurm_step_ctx 
ctx
 
);
void 
slurm_step_launch_wait_finish (
 
	slurm_step_ctx 
ctx
 
);
void 
slurm_step_launch_abort {
 
	slurm_step_ctx 
ctx
 
);
ARGUMENTS¶
  - callbacks
 
  - Identify functions to be called when various events occur.
 
  - ctx
 
  - Job step context. Created by slurm_step_ctx_create, used in
      subsequent function calls, and destroyed by
    slurm_step_ctx_destroy.
 
  - launch_req
 
  - Pointer to a structure allocated by the user containing specifications of
      the job step to be launched.
    
  
 
DESCRIPTION¶
slurm_step_launch_params_t_init Iinitialize a user-allocated
  slurm_step_launch_params_t structure with default values. default values. This
  function will NOT allocate any new memory.
slurm_step_launch Launch a parallel job step.
slurm_step_launch_fwd_signal Forward a signal to all those nodes with
  running tasks.
slurm_step_launch_wait_start Block until all tasks have started.
slurm_step_launch_wait_finish Block until all tasks have finished (or
  failed to start altogether).
slurm_step_launch_abort Abort an in-progress launch, or terminate the
  fully launched job step. Can be called from a signal handler.
IO Redirection¶
Use the 
local_fds entry in 
slurm_step_launch_params_t to specify
  file descriptors to be used for standard input, output and error. Any
  
local_fds not specified will result in the launched tasks using the
  calling process's standard input, output and error. Threads created by
  
slurm_step_launch will completely handle copying data between the
  remote processes and the specified local file descriptors.
Use the substructure in 
slurm_step_io_fds_t to restrict the redirection
  of I/O to a specific node or task ID. For example, to redirect standard output
  only from task 0, set
params.local_fs.out.taskid=0;
Use the 
remote_*_filename fields in 
slurm_step_launch_params_t to
  have launched tasks read and/or write directly to local files rather than
  transferring data over the network to the calling process. These strings
  support many of the same format options as the 
srun command. Any
  
remote_*_filename fields set will supersede the corresponding
  
local_fds entries. For example, the following code will direct each
  task to write standard output and standard error to local files with names
  containing the task ID (e.g. "/home/bob/test_output/run1.out.0" and
  "/home/bob/test_output/run.1.err.0" for task 0).
params.remote_output_filename = "/home/bob/test_output/run1.out.%t"
params.remote_error_filename  = "/home/bob/test_output/run1.err.%t"
RETURN VALUE¶
slurm_step_launch and 
slurm_step_launch_wait_start will return
  SLURM_SUCCESS when all tasks have successfully started, or SLURM_ERROR if the
  job step is aborted during launch.
ERRORS¶
EINVAL Invalid argument
SLURM_PROTOCOL_VERSION_ERROR Protocol version has changed, re-link your
  code.
ESLURM_INVALID_JOB_ID the requested job id does not exist.
ESLURM_ALREADY_DONE the specified job has already completed and can not
  be modified.
ESLURM_ACCESS_DENIED the requesting user lacks authorization for the
  requested action (e.g. trying to delete or modify another user's job).
ESLURM_INTERCONNECT_FAILURE failed to configure the node interconnect.
ESLURM_BAD_DIST task distribution specification is invalid.
SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT Timeout in communicating with SLURM
  controller.
EXAMPLE¶
/*
 * To compile:
 * gcc test.c -o test -g -pthread -lslurm
 *
 * Or if Slurm is not in your default search paths:
 * gcc test.c -o test -g -pthread -I{$SLURM_DIR}/include \
 *     -Wl,--rpath={$SLURM_DIR}/lib -L{$SLURM_DIR}/lib -lslurm
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <slurm/slurm.h>
#include <slurm/slurm_errno.h>
static void _task_start(launch_tasks_response_msg_t *msg)
{
	printf("%d tasks started on node %s\n",
		msg->count_of_pids, msg->node_name);
}
static void _task_finish(task_exit_msg_t *msg)
{
	printf("%d tasks finished\n", msg->num_tasks);
}
int main (int argc, char *argv[])
{
	slurm_step_ctx_params_t step_params;
	slurm_step_ctx step_ctx;
	slurm_step_launch_params_t params;
	slurm_step_launch_callbacks_t callbacks;
	uint32_t job_id, step_id;
	slurm_step_ctx_params_t_init(&step_params);
	step_params.node_count = 1;
	step_params.task_count = 4;
	step_params.overcommit = true;
	step_ctx = slurm_step_ctx_create(&step_params);
	if (step_ctx == NULL) {
		slurm_perror("slurm_step_ctx_create");
		exit(1);
	}
	slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_JOBID, &job_id);
	slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_STEPID, &step_id);
	printf("Ready to start job %u step %u\n", job_id, step_id);
	slurm_step_launch_params_t_init(¶ms);
	params.argc = argc - 1;
	params.argv = argv + 1;
	callbacks.task_start = _task_start;
	callbacks.task_finish = _task_finish;
	if (slurm_step_launch(step_ctx, NULL, ¶ms, &callbacks)
			!= SLURM_SUCCESS) {
		slurm_perror("slurm_step_launch");
		exit(1);
	}
	printf("Sent step launch RPC\n");
	if (slurm_step_launch_wait_start(step_ctx) != SLURM_SUCCESS) {
		fprintf(stderr, "job step was aborted during launch\n");
	} else {
		printf("All tasks have started\n");
	}
	slurm_step_launch_wait_finish(step_ctx);
	printf("All tasks have finished\n");
	slurm_step_ctx_destroy(step_ctx);
	exit(0);
}
NOTE¶
These functions are included in the libslurm library, which must be linked to
  your process for use (e.g. "cc -lslurm myprog.c").
COPYING¶
Copyright (C) 2006-2007 The Regents of the University of California. Copyright
  (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore
  National Laboratory (cf, DISCLAIMER). CODE-OCEC-09-009. All rights reserved.
This file is part of SLURM, a resource management program. For details, see
  <
http://slurm.schedmd.com/>.
SLURM is free software; you can redistribute it and/or modify it under the terms
  of the GNU General Public License as published by the Free Software
  Foundation; either version 2 of the License, or (at your option) any later
  version.
SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  A PARTICULAR PURPOSE. See the GNU General Public License for more details.
SEE ALSO¶
slurm_step_ctx_create(3), 
slurm_step_ctx_destroy(3),
  
slurm_get_errno(3), 
slurm_perror(3), 
slurm_strerror(3),
  
salloc(1), 
srun(1)