mysql56/ha__ndbcluster_8cc_source.html

/* Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved.


  This program is free software; you can redistribute it and/or modify

  it under the terms of the GNU General Public License as published by

  the Free Software Foundation; version 2 of the License.


  This program is distributed in the hope that it will be useful,

  but WITHOUT ANY WARRANTY; without even the implied warranty of

  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

  GNU General Public License for more details.


  You should have received a copy of the GNU General Public License

  along with this program; if not, write to the Free Software

  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */


#include "ha_ndbcluster_glue.h"


#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE

#include "ha_ndbcluster.h"

#include <ndbapi/NdbApi.hpp>

#include <util/Bitmask.hpp>

#include <ndbapi/NdbIndexStat.hpp>

#include <ndbapi/NdbInterpretedCode.hpp>

#include "../storage/ndb/src/ndbapi/NdbQueryBuilder.hpp"

#include "../storage/ndb/src/ndbapi/NdbQueryOperation.hpp"


#include "ha_ndbcluster_binlog.h"

#include "ha_ndbcluster_push.h"

#include "ha_ndbcluster_cond.h"

#include "ha_ndbcluster_tables.h"

#include "ha_ndbcluster_connection.h"

#include "ndb_thd.h"

#include "ndb_table_guard.h"

#include "ndb_global_schema_lock.h"

#include "ndb_global_schema_lock_guard.h"

#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

#include "abstract_query_plan.h"

#endif

#include "ndb_dist_priv_util.h"

#include "ha_ndb_index_stat.h"


#include <mysql/plugin.h>

#include <ndb_version.h>

#include "ndb_mi.h"


// ndb interface initialization/cleanup

extern "C" void ndb_init_internal();

extern "C" void ndb_end_internal();


static const int DEFAULT_PARALLELISM= 0;

static const ha_rows DEFAULT_AUTO_PREFETCH= 32;

static const ulong ONE_YEAR_IN_SECONDS= (ulong) 3600L*24L*365L;


ulong opt_ndb_extra_logging;

static ulong opt_ndb_wait_connected;

ulong opt_ndb_wait_setup;

static ulong opt_ndb_cache_check_time;

static uint opt_ndb_cluster_connection_pool;

static char* opt_ndb_index_stat_option;

static char* opt_ndb_connectstring;

static uint opt_ndb_nodeid;


static MYSQL_THDVAR_UINT(

  autoincrement_prefetch_sz,         /* name */

  PLUGIN_VAR_RQCMDARG,

  "Specify number of autoincrement values that are prefetched.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  1,                                 /* default */

  1,                                 /* min */

  65535,                             /* max */

  0                                  /* block */

);


static MYSQL_THDVAR_BOOL(

  force_send,                        /* name */

  PLUGIN_VAR_OPCMDARG,

  "Force send of buffers to ndb immediately without waiting for "

  "other threads.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  1                                  /* default */

);


static MYSQL_THDVAR_BOOL(

  use_exact_count,                   /* name */

  PLUGIN_VAR_OPCMDARG,

  "Use exact records count during query planning and for fast "

  "select count(*), disable for faster queries.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  0                                  /* default */

);


static MYSQL_THDVAR_BOOL(

  use_transactions,                  /* name */

  PLUGIN_VAR_OPCMDARG,

  "Use transactions for large inserts, if enabled then large "

  "inserts will be split into several smaller transactions",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  1                                  /* default */

);


static MYSQL_THDVAR_BOOL(

  use_copying_alter_table,           /* name */

  PLUGIN_VAR_OPCMDARG,

  "Force ndbcluster to always copy tables at alter table (should "

  "only be used if on-line alter table fails).",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  0                                  /* default */

);


static MYSQL_THDVAR_UINT(

  optimized_node_selection,          /* name */

  PLUGIN_VAR_OPCMDARG,

  "Select nodes for transactions in a more optimal way.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  3,                                 /* default */

  0,                                 /* min */

  3,                                 /* max */

  0                                  /* block */

);


static MYSQL_THDVAR_ULONG(

  batch_size,                        /* name */

  PLUGIN_VAR_RQCMDARG,

  "Batch size in bytes.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  32768,                             /* default */

  0,                                 /* min */

  ONE_YEAR_IN_SECONDS,               /* max */

  0                                  /* block */

);


static MYSQL_THDVAR_ULONG(

  optimization_delay,                /* name */

  PLUGIN_VAR_RQCMDARG,

  "For optimize table, specifies the delay in milliseconds "

  "for each batch of rows sent.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  10,                                /* default */

  0,                                 /* min */

  100000,                            /* max */

  0                                  /* block */

);


#if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)

#define DEFAULT_NDB_INDEX_STAT_ENABLE FALSE

#else

#define DEFAULT_NDB_INDEX_STAT_ENABLE TRUE

#endif


static MYSQL_THDVAR_BOOL(

  index_stat_enable,                 /* name */

  PLUGIN_VAR_OPCMDARG,

  "Use ndb index statistics in query optimization.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  DEFAULT_NDB_INDEX_STAT_ENABLE      /* default */

);


static MYSQL_THDVAR_ULONG(

  index_stat_cache_entries,          /* name */

  PLUGIN_VAR_NOCMDARG,

  "Obsolete (ignored and will be removed later).",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  32,                                /* default */

  0,                                 /* min */

  ULONG_MAX,                         /* max */

  0                                  /* block */

);


static MYSQL_THDVAR_ULONG(

  index_stat_update_freq,            /* name */

  PLUGIN_VAR_NOCMDARG,

  "Obsolete (ignored and will be removed later).",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  20,                                /* default */

  0,                                 /* min */

  ULONG_MAX,                         /* max */

  0                                  /* block */

);


static MYSQL_THDVAR_BOOL(

  table_no_logging,                  /* name */

  PLUGIN_VAR_NOCMDARG,

  "",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  FALSE                              /* default */

);


static MYSQL_THDVAR_BOOL(

  table_temporary,                   /* name */

  PLUGIN_VAR_NOCMDARG,

  "",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  FALSE                              /* default */

);


static MYSQL_THDVAR_UINT(

  blob_read_batch_bytes,             /* name */

  PLUGIN_VAR_RQCMDARG,

  "Specifies the bytesize large Blob reads "

  "should be batched into.  0 == No limit.",

  NULL,                              /* check func */

  NULL,                              /* update func */

  65536,                             /* default */

  0,                                 /* min */

  UINT_MAX,                          /* max */

  0                                  /* block */

);


static MYSQL_THDVAR_UINT(

  blob_write_batch_bytes,            /* name */

  PLUGIN_VAR_RQCMDARG,

  "Specifies the bytesize large Blob writes "

  "should be batched into.  0 == No limit.",

  NULL,                              /* check func */

  NULL,                              /* update func */

  65536,                             /* default */

  0,                                 /* min */

  UINT_MAX,                          /* max */

  0                                  /* block */

);


static MYSQL_THDVAR_UINT(

  deferred_constraints,              /* name */

  PLUGIN_VAR_RQCMDARG,

  "Specified that constraints should be checked deferred (when supported)",

  NULL,                              /* check func */

  NULL,                              /* update func */

  0,                                 /* default */

  0,                                 /* min */

  1,                                 /* max */

  0                                  /* block */

);


#if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)

#define DEFAULT_NDB_JOIN_PUSHDOWN FALSE

#else

#define DEFAULT_NDB_JOIN_PUSHDOWN TRUE

#endif


static MYSQL_THDVAR_BOOL(

  join_pushdown,                     /* name */

  PLUGIN_VAR_OPCMDARG,

  "Enable pushing down of join to datanodes",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  DEFAULT_NDB_JOIN_PUSHDOWN          /* default */

);


/*

  Required in index_stat.cc but available only from here

  thanks to use of top level anonymous structs.

*/

bool ndb_index_stat_get_enable(THD *thd)

{

  const bool value = THDVAR(thd, index_stat_enable);

  return value;

}


static int ndbcluster_end(handlerton *hton, ha_panic_function flag);

static bool ndbcluster_show_status(handlerton *hton, THD*,

                                   stat_print_fn *,

                                   enum ha_stat_type);

static int ndbcluster_alter_tablespace(handlerton *hton,

                                       THD* thd,

                                       st_alter_tablespace *info);

static int ndbcluster_fill_files_table(handlerton *hton,

                                       THD *thd,

                                       TABLE_LIST *tables,

                                       Item *cond);


#if MYSQL_VERSION_ID >= 50501


static int

ndbcluster_fill_is_table(handlerton *hton, THD *thd, TABLE_LIST *tables,

                         Item *cond, enum enum_schema_tables schema_table_idx)

{

  if (schema_table_idx == SCH_FILES)

    return  ndbcluster_fill_files_table(hton, thd, tables, cond);

  return 0;

}

#endif


handlerton *ndbcluster_hton;


static handler *ndbcluster_create_handler(handlerton *hton,

                                          TABLE_SHARE *table,

                                          MEM_ROOT *mem_root)

{

  return new (mem_root) ha_ndbcluster(hton, table);

}


static uint

ndbcluster_partition_flags()

{

  return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY |

          HA_CAN_PARTITION_UNIQUE | HA_USE_AUTO_PARTITION);

}


#ifndef NDB_WITHOUT_ONLINE_ALTER

static uint

ndbcluster_alter_table_flags(uint flags)

{

  if (flags & ALTER_DROP_PARTITION)

    return 0;

  else

    return (HA_PARTITION_FUNCTION_SUPPORTED);

}

#else

static uint

ndbcluster_alter_table_flags(uint flags)

{

  const uint f=

    HA_PARTITION_FUNCTION_SUPPORTED |

    0;


  if (flags & Alter_info::ALTER_DROP_PARTITION)

    return 0;


  return f;

}

#endif


#define NDB_AUTO_INCREMENT_RETRIES 100

#define BATCH_FLUSH_SIZE (32768)

/*

  Room for 10 instruction words, two labels (@ 2words/label)

  + 2 extra words for the case of resolve_size == 8

*/

#define MAX_CONFLICT_INTERPRETED_PROG_SIZE 16


static int ndb_to_mysql_error(const NdbError *ndberr);


#define ERR_PRINT(err) \

  DBUG_PRINT("error", ("%d  message: %s", err.code, err.message))


#define ERR_RETURN(err)                  \

{                                        \

  const NdbError& tmp= err;              \

  DBUG_RETURN(ndb_to_mysql_error(&tmp)); \

}


#define ERR_BREAK(err, code)             \

{                                        \

  const NdbError& tmp= err;              \

  code= ndb_to_mysql_error(&tmp);        \

  break;                                 \

}


#define ERR_SET(err, code)               \

{                                        \

  const NdbError& tmp= err;              \

  code= ndb_to_mysql_error(&tmp);        \

}


static int ndbcluster_inited= 0;

int ndbcluster_terminating= 0;


/*

   Indicator and CONDVAR used to delay client and slave

   connections until Ndb has Binlog setup

   (bug#46955)

*/

int ndb_setup_complete= 0;

pthread_cond_t COND_ndb_setup_complete; // Signal with ndbcluster_mutex


extern Ndb* g_ndb;


uchar g_node_id_map[max_ndb_nodes];


pthread_mutex_t ndbcluster_mutex;


HASH ndbcluster_open_tables;


static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,

                                my_bool not_used __attribute__((unused)));


static void modify_shared_stats(NDB_SHARE *share,

                                Ndb_local_table_statistics *local_stat);


static int ndb_get_table_statistics(THD *thd, ha_ndbcluster*, bool, Ndb*,

                                    const NdbRecord *, struct Ndb_statistics *,

                                    bool have_lock= FALSE,

                                    uint part_id= ~(uint)0);


THD *injector_thd= 0;


// Util thread variables

pthread_t ndb_util_thread;

int ndb_util_thread_running= 0;

pthread_mutex_t LOCK_ndb_util_thread;

pthread_cond_t COND_ndb_util_thread;

pthread_cond_t COND_ndb_util_ready;

pthread_handler_t ndb_util_thread_func(void *arg);


// Index stats thread variables

pthread_t ndb_index_stat_thread;

int ndb_index_stat_thread_running= 0;

pthread_mutex_t LOCK_ndb_index_stat_thread;

pthread_cond_t COND_ndb_index_stat_thread;

pthread_cond_t COND_ndb_index_stat_ready;

pthread_mutex_t ndb_index_stat_list_mutex;

pthread_mutex_t ndb_index_stat_stat_mutex;

pthread_cond_t ndb_index_stat_stat_cond;

pthread_handler_t ndb_index_stat_thread_func(void *arg);


extern void ndb_index_stat_free(NDB_SHARE *share);

extern void ndb_index_stat_end();


/* Status variables shown with 'show status like 'Ndb%' */


struct st_ndb_status g_ndb_status;


long g_ndb_status_index_stat_cache_query = 0;

long g_ndb_status_index_stat_cache_clean = 0;


long long g_event_data_count = 0;

long long g_event_nondata_count = 0;

long long g_event_bytes_count = 0;


static long long g_slave_api_client_stats[Ndb::NumClientStatistics];


static long long g_server_api_client_stats[Ndb::NumClientStatistics];


void

update_slave_api_stats(Ndb* ndb)

{

  for (Uint32 i=0; i < Ndb::NumClientStatistics; i++)

    g_slave_api_client_stats[i] = ndb->getClientStat(i);

}


st_ndb_slave_state g_ndb_slave_state;


st_ndb_slave_state::st_ndb_slave_state()

  : current_conflict_defined_op_count(0),

    current_master_server_epoch(0),

    current_max_rep_epoch(0),

    max_rep_epoch(0),

    sql_run_id(~Uint32(0))

{

  memset(current_violation_count, 0, sizeof(current_violation_count));

  memset(total_violation_count, 0, sizeof(total_violation_count));

};


void

st_ndb_slave_state::atTransactionAbort()

{

  /* Reset current-transaction counters + state */

  memset(current_violation_count, 0, sizeof(current_violation_count));

  current_conflict_defined_op_count = 0;

  current_max_rep_epoch = 0;

}


void

st_ndb_slave_state::atTransactionCommit()

{

  /* Merge committed transaction counters into total state

   * Then reset current transaction counters

   */

  for (int i=0; i < CFT_NUMBER_OF_CFTS; i++)

  {

    total_violation_count[i]+= current_violation_count[i];

    current_violation_count[i] = 0;

  }

  current_conflict_defined_op_count = 0;

  if (current_max_rep_epoch > max_rep_epoch)

  {

    DBUG_PRINT("info", ("Max replicated epoch increases from %llu to %llu",

                        max_rep_epoch,

                        current_max_rep_epoch));


    max_rep_epoch = current_max_rep_epoch;

  }

  current_max_rep_epoch = 0;

}


void

st_ndb_slave_state::atApplyStatusWrite(Uint32 master_server_id,

                                       Uint32 row_server_id,

                                       Uint64 row_epoch,

                                       bool is_row_server_id_local)

{

  if (row_server_id == master_server_id)

  {

    /*

       WRITE_ROW to ndb_apply_status injected by MySQLD

       immediately upstream of us.

       Record epoch

    */

    current_master_server_epoch = row_epoch;

    assert(! is_row_server_id_local);

  }

  else if (is_row_server_id_local)

  {

    DBUG_PRINT("info", ("Recording application of local server %u epoch %llu "

                        " which is %s.",

                        row_server_id, row_epoch,

                        (row_epoch > g_ndb_slave_state.current_max_rep_epoch)?

                        " new highest." : " older than previously applied"));

    if (row_epoch > current_max_rep_epoch)

    {

      /*

        Store new highest epoch in thdvar.  If we commit successfully

        then this can become the new global max

      */

      current_max_rep_epoch = row_epoch;

    }

  }

}


void

st_ndb_slave_state::atResetSlave()

{

  /* Reset the Maximum replicated epoch vars

   * on slave reset

   * No need to touch the sql_run_id as that

   * will increment if the slave is started

   * again.

   */

  current_max_rep_epoch = 0;

  max_rep_epoch = 0;

}


static int check_slave_state(THD* thd)

{

  DBUG_ENTER("check_slave_state");


#ifdef HAVE_NDB_BINLOG

  if (!thd->slave_thread)

    DBUG_RETURN(0);


  const Uint32 runId = ndb_mi_get_slave_run_id();

  DBUG_PRINT("info", ("Slave SQL thread run id is %u",

                      runId));

  if (unlikely(runId != g_ndb_slave_state.sql_run_id))

  {

    DBUG_PRINT("info", ("Slave run id changed from %u, "

                        "treating as Slave restart",

                        g_ndb_slave_state.sql_run_id));

    g_ndb_slave_state.sql_run_id = runId;


    /* Always try to load the Max Replicated Epoch info

     * first.

     * Could be made optional if it's a problem

     */

    {

      /*

         Load highest replicated epoch from a local

         MySQLD from the cluster.

      */

      DBUG_PRINT("info", ("Loading applied epoch information from %s",

                          NDB_APPLY_TABLE));

      NdbError ndb_error;

      Uint64 highestAppliedEpoch = 0;

      do

      {

        Ndb* ndb= check_ndb_in_thd(thd);

        NDBDICT* dict= ndb->getDictionary();

        NdbTransaction* trans= NULL;

        ndb->setDatabaseName(NDB_REP_DB);

        Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);


        const NDBTAB* ndbtab= ndbtab_g.get_table();

        if (unlikely(ndbtab == NULL))

        {

          ndb_error = dict->getNdbError();

          break;

        }


        trans= ndb->startTransaction();

        if (unlikely(trans == NULL))

        {

          ndb_error = ndb->getNdbError();

          break;

        }


        do

        {

          NdbScanOperation* sop = trans->getNdbScanOperation(ndbtab);

          if (unlikely(sop == NULL))

          {

            ndb_error = trans->getNdbError();

            break;

          }


          const Uint32 server_id_col_num = 0;

          const Uint32 epoch_col_num = 1;

          NdbRecAttr* server_id_ra = 0;

          NdbRecAttr* epoch_ra = 0;


          if (unlikely((sop->readTuples(NdbOperation::LM_CommittedRead) != 0)   ||

                       ((server_id_ra = sop->getValue(server_id_col_num)) == NULL)  ||

                       ((epoch_ra = sop->getValue(epoch_col_num)) == NULL)))

          {

            ndb_error = sop->getNdbError();

            break;

          }


          if (trans->execute(NdbTransaction::Commit))

          {

            ndb_error = trans->getNdbError();

            break;

          }


          int rc = 0;

          while (0 == (rc= sop->nextResult(true)))

          {

            Uint32 serverid = server_id_ra->u_32_value();

            Uint64 epoch = epoch_ra->u_64_value();


            if ((serverid == ::server_id) ||

                (ndb_mi_get_ignore_server_id(serverid)))

            {

              highestAppliedEpoch = MAX(epoch, highestAppliedEpoch);

            }

          }


          if (rc != 1)

          {

            ndb_error = sop->getNdbError();

            break;

          }

        } while (0);


        trans->close();

      } while(0);


      if (ndb_error.code != 0)

      {

        sql_print_warning("NDB Slave : Could not determine maximum replicated epoch from %s.%s "

                          "at Slave start, error %u %s",

                          NDB_REP_DB,

                          NDB_APPLY_TABLE,

                          ndb_error.code, ndb_error.message);

      }


      /*

        Set Global status variable to the Highest Applied Epoch from

        the Cluster DB.

        If none was found, this will be zero.

      */

      g_ndb_slave_state.max_rep_epoch = highestAppliedEpoch;

      sql_print_information("NDB Slave : MaxReplicatedEpoch set to %llu (%u/%u) at Slave start",

                            g_ndb_slave_state.max_rep_epoch,

                            (Uint32)(g_ndb_slave_state.max_rep_epoch >> 32),

                            (Uint32)(g_ndb_slave_state.max_rep_epoch & 0xffffffff));

    } // Load highest replicated epoch

  } // New Slave SQL thread run id

#endif


  DBUG_RETURN(0);

}


static int update_status_variables(Thd_ndb *thd_ndb,

                                   st_ndb_status *ns,

                                   Ndb_cluster_connection *c)

{

  ns->connected_port= c->get_connected_port();

  ns->connected_host= c->get_connected_host();

  if (ns->cluster_node_id != (int) c->node_id())

  {

    ns->cluster_node_id= c->node_id();

    if (&g_ndb_status == ns && g_ndb_cluster_connection == c)

      sql_print_information("NDB: NodeID is %lu, management server '%s:%lu'",

                            ns->cluster_node_id, ns->connected_host,

                            ns->connected_port);

  }

  ns->number_of_replicas= 0;

  {

    int n= c->get_no_ready();

    ns->number_of_ready_data_nodes= n > 0 ?  n : 0;

  }

  ns->number_of_data_nodes= c->no_db_nodes();

  ns->connect_count= c->get_connect_count();

  if (thd_ndb)

  {

    ns->execute_count= thd_ndb->m_execute_count;

    ns->scan_count= thd_ndb->m_scan_count;

    ns->pruned_scan_count= thd_ndb->m_pruned_scan_count;

    ns->sorted_scan_count= thd_ndb->m_sorted_scan_count;

    ns->pushed_queries_defined= thd_ndb->m_pushed_queries_defined;

    ns->pushed_queries_dropped= thd_ndb->m_pushed_queries_dropped;

    ns->pushed_queries_executed= thd_ndb->m_pushed_queries_executed;

    ns->pushed_reads= thd_ndb->m_pushed_reads;

    for (int i= 0; i < MAX_NDB_NODES; i++)

    {

      ns->transaction_no_hint_count[i]= thd_ndb->m_transaction_no_hint_count[i];

      ns->transaction_hint_count[i]= thd_ndb->m_transaction_hint_count[i];

    }

    for (int i=0; i < Ndb::NumClientStatistics; i++)

    {

      ns->api_client_stats[i] = thd_ndb->ndb->getClientStat(i);

    }

    ns->schema_locks_count= thd_ndb->schema_locks_count;

  }

  return 0;

}


/* Helper macro for definitions of NdbApi status variables */


#define NDBAPI_COUNTERS(NAME_SUFFIX, ARRAY_LOCATION)                    \

  {"api_wait_exec_complete_count" NAME_SUFFIX,                          \

   (char*) ARRAY_LOCATION[ Ndb::WaitExecCompleteCount ],                \

   SHOW_LONGLONG},                                                      \

  {"api_wait_scan_result_count" NAME_SUFFIX,                            \

   (char*) ARRAY_LOCATION[ Ndb::WaitScanResultCount ],                  \

   SHOW_LONGLONG},                                                      \

  {"api_wait_meta_request_count" NAME_SUFFIX,                           \

   (char*) ARRAY_LOCATION[ Ndb::WaitMetaRequestCount ],                 \

   SHOW_LONGLONG},                                                      \

  {"api_wait_nanos_count" NAME_SUFFIX,                                  \

   (char*) ARRAY_LOCATION[ Ndb::WaitNanosCount ],                       \

   SHOW_LONGLONG},                                                      \

  {"api_bytes_sent_count" NAME_SUFFIX,                                  \

   (char*) ARRAY_LOCATION[ Ndb::BytesSentCount ],                       \

   SHOW_LONGLONG},                                                      \

  {"api_bytes_received_count" NAME_SUFFIX,                              \

   (char*) ARRAY_LOCATION[ Ndb::BytesRecvdCount ],                      \

   SHOW_LONGLONG},                                                      \

  {"api_trans_start_count" NAME_SUFFIX,                                 \

   (char*) ARRAY_LOCATION[ Ndb::TransStartCount ],                      \

   SHOW_LONGLONG},                                                      \

  {"api_trans_commit_count" NAME_SUFFIX,                                \

   (char*) ARRAY_LOCATION[ Ndb::TransCommitCount ],                     \

   SHOW_LONGLONG},                                                      \

  {"api_trans_abort_count" NAME_SUFFIX,                                 \

   (char*) ARRAY_LOCATION[ Ndb::TransAbortCount ],                      \

   SHOW_LONGLONG},                                                      \

  {"api_trans_close_count" NAME_SUFFIX,                                 \

   (char*) ARRAY_LOCATION[ Ndb::TransCloseCount ],                      \

   SHOW_LONGLONG},                                                      \

  {"api_pk_op_count" NAME_SUFFIX,                                       \

   (char*) ARRAY_LOCATION[ Ndb::PkOpCount ],                            \

   SHOW_LONGLONG},                                                      \

  {"api_uk_op_count" NAME_SUFFIX,                                       \

   (char*) ARRAY_LOCATION[ Ndb::UkOpCount ],                            \

   SHOW_LONGLONG},                                                      \

  {"api_table_scan_count" NAME_SUFFIX,                                  \

   (char*) ARRAY_LOCATION[ Ndb::TableScanCount ],                       \

   SHOW_LONGLONG},                                                      \

  {"api_range_scan_count" NAME_SUFFIX,                                  \

   (char*) ARRAY_LOCATION[ Ndb::RangeScanCount ],                       \

   SHOW_LONGLONG},                                                      \

  {"api_pruned_scan_count" NAME_SUFFIX,                                 \

   (char*) ARRAY_LOCATION[ Ndb::PrunedScanCount ],                      \

   SHOW_LONGLONG},                                                      \

  {"api_scan_batch_count" NAME_SUFFIX,                                  \

   (char*) ARRAY_LOCATION[ Ndb::ScanBatchCount ],                       \

   SHOW_LONGLONG},                                                      \

  {"api_read_row_count" NAME_SUFFIX,                                    \

   (char*) ARRAY_LOCATION[ Ndb::ReadRowCount ],                         \

   SHOW_LONGLONG},                                                      \

  {"api_trans_local_read_row_count" NAME_SUFFIX,                        \

   (char*) ARRAY_LOCATION[ Ndb::TransLocalReadRowCount ],               \

   SHOW_LONGLONG}


SHOW_VAR ndb_status_variables_dynamic[]= {

  {"cluster_node_id",     (char*) &g_ndb_status.cluster_node_id,      SHOW_LONG},

  {"config_from_host",    (char*) &g_ndb_status.connected_host,       SHOW_CHAR_PTR},

  {"config_from_port",    (char*) &g_ndb_status.connected_port,       SHOW_LONG},

//{"number_of_replicas",  (char*) &g_ndb_status.number_of_replicas,   SHOW_LONG},

  {"number_of_data_nodes",(char*) &g_ndb_status.number_of_data_nodes, SHOW_LONG},

  {"number_of_ready_data_nodes",

   (char*) &g_ndb_status.number_of_ready_data_nodes,                  SHOW_LONG},

  {"connect_count",      (char*) &g_ndb_status.connect_count,         SHOW_LONG},

  {"execute_count",      (char*) &g_ndb_status.execute_count,         SHOW_LONG},

  {"scan_count",         (char*) &g_ndb_status.scan_count,            SHOW_LONG},

  {"pruned_scan_count",  (char*) &g_ndb_status.pruned_scan_count,     SHOW_LONG},

  {"schema_locks_count", (char*) &g_ndb_status.schema_locks_count,    SHOW_LONG},

  NDBAPI_COUNTERS("_session", &g_ndb_status.api_client_stats),

  {"sorted_scan_count",  (char*) &g_ndb_status.sorted_scan_count,     SHOW_LONG},

  {"pushed_queries_defined", (char*) &g_ndb_status.pushed_queries_defined,

   SHOW_LONG},

  {"pushed_queries_dropped", (char*) &g_ndb_status.pushed_queries_dropped,

   SHOW_LONG},

  {"pushed_queries_executed", (char*) &g_ndb_status.pushed_queries_executed,

   SHOW_LONG},

  {"pushed_reads",       (char*) &g_ndb_status.pushed_reads,          SHOW_LONG},

  {NullS, NullS, SHOW_LONG}

};


SHOW_VAR ndb_status_conflict_variables[]= {

  {"fn_max",       (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_MAX], SHOW_LONGLONG},

  {"fn_old",       (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_OLD], SHOW_LONGLONG},

  {"fn_max_del_win", (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_MAX_DEL_WIN], SHOW_LONGLONG},

  {"fn_epoch",     (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_EPOCH], SHOW_LONGLONG},

  {NullS, NullS, SHOW_LONG}

};


SHOW_VAR ndb_status_injector_variables[]= {

  {"api_event_data_count_injector",     (char*) &g_event_data_count, SHOW_LONGLONG},

  {"api_event_nondata_count_injector",  (char*) &g_event_nondata_count, SHOW_LONGLONG},

  {"api_event_bytes_count_injector",    (char*) &g_event_bytes_count, SHOW_LONGLONG},

  {NullS, NullS, SHOW_LONG}

};


SHOW_VAR ndb_status_slave_variables[]= {

  NDBAPI_COUNTERS("_slave", &g_slave_api_client_stats),

  {"slave_max_replicated_epoch", (char*) &g_ndb_slave_state.max_rep_epoch, SHOW_LONGLONG},

  {NullS, NullS, SHOW_LONG}

};


SHOW_VAR ndb_status_server_client_stat_variables[]= {

  NDBAPI_COUNTERS("", &g_server_api_client_stats),

  {"api_event_data_count",

   (char*) &g_server_api_client_stats[ Ndb::DataEventsRecvdCount ],

   SHOW_LONGLONG},

  {"api_event_nondata_count",

   (char*) &g_server_api_client_stats[ Ndb::NonDataEventsRecvdCount ],

   SHOW_LONGLONG},

  {"api_event_bytes_count",

   (char*) &g_server_api_client_stats[ Ndb::EventBytesRecvdCount ],

   SHOW_LONGLONG},

  {NullS, NullS, SHOW_LONG}

};


static int show_ndb_server_api_stats(THD *thd, SHOW_VAR *var, char *buff)

{

  /* This function is called when SHOW STATUS / INFO_SCHEMA wants

   * to see one of our status vars

   * We use this opportunity to :

   *  1) Update the globals with current values

   *  2) Return an array of var definitions, pointing to

   *     the updated globals

   */

  ndb_get_connection_stats((Uint64*) &g_server_api_client_stats[0]);


  var->type= SHOW_ARRAY;

  var->value= (char*) ndb_status_server_client_stat_variables;


  return 0;

}


SHOW_VAR ndb_status_index_stat_variables[]= {

  {"cache_query",     (char*) &g_ndb_status_index_stat_cache_query, SHOW_LONG},

  {"cache_clean",     (char*) &g_ndb_status_index_stat_cache_clean, SHOW_LONG},

  {NullS, NullS, SHOW_LONG}

};


#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

static int ndbcluster_make_pushed_join(handlerton *, THD*,AQP::Join_plan*);

#endif


/*

  Error handling functions

*/


/* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */


static int ndb_to_mysql_error(const NdbError *ndberr)

{

  /* read the mysql mapped error code */

  int error= ndberr->mysql_code;


  switch (error)

  {

    /* errors for which we do not add warnings, just return mapped error code

    */

  case HA_ERR_NO_SUCH_TABLE:

  case HA_ERR_KEY_NOT_FOUND:

    return error;


    /* Mapping missing, go with the ndb error code*/

  case -1:

    error= ndberr->code;

    break;

    /* Mapping exists, go with the mapped code */

  default:

    break;

  }


  /*

    If we don't abort directly on warnings push a warning

    with the internal error information

   */

  if (!current_thd->abort_on_warning)

  {

    /*

      Push the NDB error message as warning

      - Used to be able to use SHOW WARNINGS toget more info on what the error is

      - Used by replication to see if the error was temporary

    */

    if (ndberr->status == NdbError::TemporaryError)

      push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                          ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG),

                          ndberr->code, ndberr->message, "NDB");

    else

      push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                          ER_GET_ERRMSG, ER(ER_GET_ERRMSG),

                          ndberr->code, ndberr->message, "NDB");

  }

  return error;

}


#ifdef HAVE_NDB_BINLOG


/* Write conflicting row to exceptions table. */

static int write_conflict_row(NDB_SHARE *share,

                              NdbTransaction *trans,

                              const uchar *row,

                              NdbError& err)

{

  DBUG_ENTER("write_conflict_row");


  /* get exceptions table */

  NDB_CONFLICT_FN_SHARE *cfn_share= share->m_cfn_share;

  const NDBTAB *ex_tab= cfn_share->m_ex_tab;

  DBUG_ASSERT(ex_tab != NULL);


  /* get insert op */

  NdbOperation *ex_op= trans->getNdbOperation(ex_tab);

  if (ex_op == NULL)

  {

    err= trans->getNdbError();

    DBUG_RETURN(-1);

  }

  if (ex_op->insertTuple() == -1)

  {

    err= ex_op->getNdbError();

    DBUG_RETURN(-1);

  }

  {

    uint32 server_id= (uint32)::server_id;

    uint32 master_server_id= (uint32) ndb_mi_get_master_server_id();

    uint64 master_epoch= (uint64) g_ndb_slave_state.current_master_server_epoch;

    uint32 count= (uint32)++(cfn_share->m_count);

    if (ex_op->setValue((Uint32)0, (const char *)&(server_id)) ||

        ex_op->setValue((Uint32)1, (const char *)&(master_server_id)) ||

        ex_op->setValue((Uint32)2, (const char *)&(master_epoch)) ||

        ex_op->setValue((Uint32)3, (const char *)&(count)))

    {

      err= ex_op->getNdbError();

      DBUG_RETURN(-1);

    }

  }

  /* copy primary keys */

  {

    const int fixed_cols= 4;

    int nkey= cfn_share->m_pk_cols;

    int k;

    for (k= 0; k < nkey; k++)

    {

      DBUG_ASSERT(row != NULL);

      const uchar* data= row + cfn_share->m_offset[k];

      if (ex_op->setValue((Uint32)(fixed_cols + k), (const char*)data) == -1)

      {

        err= ex_op->getNdbError();

        DBUG_RETURN(-1);

      }

    }

  }

  DBUG_RETURN(0);

}

#endif


#ifdef HAVE_NDB_BINLOG

int

handle_conflict_op_error(Thd_ndb* thd_ndb,

                         NdbTransaction* trans,

                         const NdbError& err,

                         const NdbOperation* op);


int

handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,

                    const char* tab_name,

                    const NdbRecord* key_rec,

                    const uchar* pk_row,

                    enum_conflicting_op_type op_type,

                    enum_conflict_cause conflict_cause,

                    const NdbError& conflict_error,

                    NdbTransaction* conflict_trans,

                    NdbError& err);

#endif


static const Uint32 error_op_after_refresh_op = 920;


inline int

check_completed_operations_pre_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,

                                      const NdbOperation *first,

                                      const NdbOperation *last,

                                      uint *ignore_count)

{

  uint ignores= 0;

  DBUG_ENTER("check_completed_operations_pre_commit");


  if (unlikely(first == 0))

  {

    assert(last == 0);

    DBUG_RETURN(0);

  }


  /*

    Check that all errors are "accepted" errors

    or exceptions to report

  */

#ifdef HAVE_NDB_BINLOG

  const NdbOperation* lastUserOp = trans->getLastDefinedOperation();

#endif

  while (true)

  {

    const NdbError &err= first->getNdbError();

    const bool op_has_conflict_detection = (first->getCustomData() != NULL);

    if (!op_has_conflict_detection)

    {

      /* 'Normal path' - ignore key (not) present, others are errors */

      if (err.classification != NdbError::NoError &&

          err.classification != NdbError::ConstraintViolation &&

          err.classification != NdbError::NoDataFound)

      {

        /* Non ignored error, report it */

        DBUG_PRINT("info", ("err.code == %u", err.code));

        DBUG_RETURN(err.code);

      }

    }

#ifdef HAVE_NDB_BINLOG

    else

    {

      /*

         Op with conflict detection, use special error handling method

       */


      if (err.classification != NdbError::NoError)

      {

        int res = handle_conflict_op_error(thd_ndb,

                                           trans,

                                           err,

                                           first);

        if (res != 0)

          DBUG_RETURN(res);

      }

    } // if (!op_has_conflict_detection)

#endif

    if (err.classification != NdbError::NoError)

      ignores++;


    if (first == last)

      break;


    first= trans->getNextCompletedOperation(first);

  }

  if (ignore_count)

    *ignore_count= ignores;

#ifdef HAVE_NDB_BINLOG

  /*

     Conflict detection related error handling above may have defined

     new operations on the transaction.  If so, execute them now

  */

  if (trans->getLastDefinedOperation() != lastUserOp)

  {

    const NdbOperation* last_conflict_op = trans->getLastDefinedOperation();


    if (trans->execute(NdbTransaction::NoCommit,

                       NdbOperation::AO_IgnoreError,

                       thd_ndb->m_force_send))

    {

      abort();

      //err= trans->getNdbError();

    }


    if (trans->getNdbError().code)

    {

      /* Check the result codes of the operations we added */

      const NdbOperation* conflict_op = NULL;

      do

      {

        conflict_op = trans->getNextCompletedOperation(conflict_op);

        assert(conflict_op != NULL);

        /* We will ignore 920 which represents a refreshOp or other op

         * arriving after a refreshOp

         */

        const NdbError& err = conflict_op->getNdbError();

        if ((err.code != 0) &&

            (err.code != (int) error_op_after_refresh_op))

        {

          if (err.status == NdbError::TemporaryError)

          {

            /* Slave will roll back and retry entire transaction. */

            ERR_RETURN(err);

          }

          else

          {

            char msg[FN_REFLEN];

            my_snprintf(msg, sizeof(msg), "Executing extra operations for "

                        "conflict handling hit Ndb error %d '%s'",

                        err.code, err.message);

            push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_ERROR,

                                ER_EXCEPTIONS_WRITE_ERROR,

                                ER(ER_EXCEPTIONS_WRITE_ERROR), msg);

            /* Slave will stop replication. */

            DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);

          }

        }

      } while (conflict_op != last_conflict_op);

    }

  }

#endif

  DBUG_RETURN(0);

}


inline int

check_completed_operations(Thd_ndb *thd_ndb, NdbTransaction *trans,

                           const NdbOperation *first,

                           const NdbOperation *last,

                           uint *ignore_count)

{

  uint ignores= 0;

  DBUG_ENTER("check_completed_operations");


  if (unlikely(first == 0))

  {

    assert(last == 0);

    DBUG_RETURN(0);

  }


  /*

    Check that all errors are "accepted" errors

  */

  while (true)

  {

    const NdbError &err= first->getNdbError();

    if (err.classification != NdbError::NoError &&

        err.classification != NdbError::ConstraintViolation &&

        err.classification != NdbError::NoDataFound)

    {

#ifdef HAVE_NDB_BINLOG

      /* All conflict detection etc should be done before commit */

      DBUG_ASSERT((err.code != (int) error_conflict_fn_violation) &&

                  (err.code != (int) error_op_after_refresh_op));

#endif

      DBUG_RETURN(err.code);

    }

    if (err.classification != NdbError::NoError)

      ignores++;


    if (first == last)

      break;


    first= trans->getNextCompletedOperation(first);

  }

  if (ignore_count)

    *ignore_count= ignores;

  DBUG_RETURN(0);

}


void

ha_ndbcluster::release_completed_operations(NdbTransaction *trans)

{

  trans->releaseCompletedOperations();

  trans->releaseCompletedQueries();

}


int execute_no_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,

                      bool ignore_no_key,

                      uint *ignore_count= 0);

inline

int execute_no_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,

                      bool ignore_no_key,

                      uint *ignore_count)

{

  DBUG_ENTER("execute_no_commit");

  ha_ndbcluster::release_completed_operations(trans);

  const NdbOperation *first= trans->getFirstDefinedOperation();

  const NdbOperation *last= trans->getLastDefinedOperation();

  thd_ndb->m_execute_count++;

  thd_ndb->m_unsent_bytes= 0;

  DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));

  if (trans->execute(NdbTransaction::NoCommit,

                     NdbOperation::AO_IgnoreError,

                     thd_ndb->m_force_send))

  {

    DBUG_RETURN(-1);

  }

  if (!ignore_no_key || trans->getNdbError().code == 0)

    DBUG_RETURN(trans->getNdbError().code);


  DBUG_RETURN(check_completed_operations_pre_commit(thd_ndb, trans,

                                                    first, last,

                                                    ignore_count));

}


int execute_commit(THD* thd, Thd_ndb *thd_ndb, NdbTransaction *trans,

                   int force_send, int ignore_error, uint *ignore_count= 0);

inline

int execute_commit(THD* thd, Thd_ndb *thd_ndb, NdbTransaction *trans,

                   int force_send, int ignore_error, uint *ignore_count)

{

  DBUG_ENTER("execute_commit");

  NdbOperation::AbortOption ao= NdbOperation::AO_IgnoreError;

  if (thd_ndb->m_unsent_bytes && !ignore_error)

  {

    /*

      We have unsent bytes and cannot ignore error.  Calling execute

      with NdbOperation::AO_IgnoreError will result in possible commit

      of a transaction although there is an error.

    */

    ao= NdbOperation::AbortOnError;

  }

  const NdbOperation *first= trans->getFirstDefinedOperation();

  const NdbOperation *last= trans->getLastDefinedOperation();

  thd_ndb->m_execute_count++;

  thd_ndb->m_unsent_bytes= 0;

  DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));

  if (trans->execute(NdbTransaction::Commit, ao, force_send))

  {

    if (thd->slave_thread)

      g_ndb_slave_state.atTransactionAbort();

    DBUG_RETURN(-1);

  }

  /* Success of some sort */

  if (thd->slave_thread)

  {

    g_ndb_slave_state.atTransactionCommit();

  }

  if (!ignore_error || trans->getNdbError().code == 0)

    DBUG_RETURN(trans->getNdbError().code);

  DBUG_RETURN(check_completed_operations(thd_ndb, trans, first, last,

                                         ignore_count));

}


inline

int execute_no_commit_ie(Thd_ndb *thd_ndb, NdbTransaction *trans)

{

  DBUG_ENTER("execute_no_commit_ie");

  ha_ndbcluster::release_completed_operations(trans);

  int res= trans->execute(NdbTransaction::NoCommit,

                          NdbOperation::AO_IgnoreError,

                          thd_ndb->m_force_send);

  thd_ndb->m_unsent_bytes= 0;

  thd_ndb->m_execute_count++;

  DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));

  DBUG_RETURN(res);

}


/*

  Place holder for ha_ndbcluster thread specific data

*/

typedef struct st_thd_ndb_share {

  const void *key;

  struct Ndb_local_table_statistics stat;

} THD_NDB_SHARE;

static

uchar *thd_ndb_share_get_key(THD_NDB_SHARE *thd_ndb_share, size_t *length,

                            my_bool not_used __attribute__((unused)))

{

  *length= sizeof(thd_ndb_share->key);

  return (uchar*) &thd_ndb_share->key;

}


Thd_ndb::Thd_ndb(THD* thd) :

  m_thd(thd),

  schema_locks_count(0)

{

  connection= ndb_get_cluster_connection();

  m_connect_count= connection->get_connect_count();

  ndb= new Ndb(connection, "");

  lock_count= 0;

  start_stmt_count= 0;

  save_point_count= 0;

  count= 0;

  trans= NULL;

  m_handler= NULL;

  m_error= FALSE;

  options= 0;

  (void) my_hash_init(&open_tables, table_alias_charset, 5, 0, 0,

                      (my_hash_get_key)thd_ndb_share_get_key, 0, 0);

  m_unsent_bytes= 0;

  m_execute_count= 0;

  m_scan_count= 0;

  m_pruned_scan_count= 0;

  m_sorted_scan_count= 0;

  m_pushed_queries_defined= 0;

  m_pushed_queries_dropped= 0;

  m_pushed_queries_executed= 0;

  m_pushed_reads= 0;

  memset(m_transaction_no_hint_count, 0, sizeof(m_transaction_no_hint_count));

  memset(m_transaction_hint_count, 0, sizeof(m_transaction_hint_count));

  global_schema_lock_trans= NULL;

  global_schema_lock_count= 0;

  global_schema_lock_error= 0;

  init_alloc_root(&m_batch_mem_root, BATCH_FLUSH_SIZE/4, 0);

}


Thd_ndb::~Thd_ndb()

{

  if (opt_ndb_extra_logging > 1)

  {

    /*

      print some stats about the connection at disconnect

    */

    for (int i= 0; i < MAX_NDB_NODES; i++)

    {

      if (m_transaction_hint_count[i] > 0 ||

          m_transaction_no_hint_count[i] > 0)

      {

        sql_print_information("tid %u: node[%u] "

                              "transaction_hint=%u, transaction_no_hint=%u",

                              (unsigned)current_thd->thread_id, i,

                              m_transaction_hint_count[i],

                              m_transaction_no_hint_count[i]);

      }

    }

  }

  if (ndb)

  {

    delete ndb;

    ndb= NULL;

  }

  changed_tables.empty();

  my_hash_free(&open_tables);

  free_root(&m_batch_mem_root, MYF(0));

}


inline

Ndb *ha_ndbcluster::get_ndb(THD *thd)

{

  return get_thd_ndb(thd)->ndb;

}


/*

 * manage uncommitted insert/deletes during transactio to get records correct

 */


void ha_ndbcluster::set_rec_per_key()

{

  DBUG_ENTER("ha_ndbcluster::set_rec_per_key");

  /*

    Set up the 'rec_per_key[]' for keys which we have good knowledge

    about the distribution. 'rec_per_key[]' is init'ed to '0' by

    open_binary_frm(), which is interpreted as 'unknown' by optimizer.

    -> Not setting 'rec_per_key[]' will force the optimizer to use

    its own heuristic to estimate 'records pr. key'.

  */

  for (uint i=0 ; i < table_share->keys ; i++)

  {

    bool is_unique_index= false;

    KEY* key_info= table->key_info + i;

    switch (get_index_type(i))

    {

    case UNIQUE_INDEX:

    case PRIMARY_KEY_INDEX:

    {

      // Index is unique when all 'key_parts' are specified,

      // else distribution is unknown and not specified here.

      is_unique_index= true;

      break;

    }

    case UNIQUE_ORDERED_INDEX:

    case PRIMARY_KEY_ORDERED_INDEX:

      is_unique_index= true;

      // intentional fall thru to logic for ordered index

    case ORDERED_INDEX:

      // 'Records pr. key' are unknown for non-unique indexes.

      // (May change when we get better index statistics.)

    {

      THD *thd= current_thd;

      const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&

                                    THDVAR(thd, index_stat_enable);

      if (index_stat_enable)

      {

        int err= ndb_index_stat_set_rpk(i);

        if (err != 0 &&

            /* no stats is not unexpected error */

            err != NdbIndexStat::NoIndexStats &&

            /* warning was printed at first error */

            err != Ndb_index_stat_error_HAS_ERROR)

        {

          push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                              ER_CANT_GET_STAT, /* pun? */

                              "index stats (RPK) for key %s:"

                              " unexpected error %d",

                              key_info->name, err);

        }

      }

      // no fallback method...

      break;

    }

    default:

      DBUG_ASSERT(false);

    }

    // set rows per key to 1 for complete key given for unique/primary index

    if (is_unique_index)

    {

      key_info->rec_per_key[key_info->user_defined_key_parts-1]= 1;

    }

  }

  DBUG_VOID_RETURN;

}


ha_rows ha_ndbcluster::records()

{

  DBUG_ENTER("ha_ndbcluster::records");

  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",

                      ((const NDBTAB *)m_table)->getTableId(),

                      m_table_info->no_uncommitted_rows_count));


  if (update_stats(table->in_use, 1) == 0)

  {

    DBUG_RETURN(stats.records);

  }

  else

  {

    DBUG_RETURN(HA_POS_ERROR);

  }

}


void ha_ndbcluster::no_uncommitted_rows_execute_failure()

{

  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");

  get_thd_ndb(current_thd)->m_error= TRUE;

  DBUG_VOID_RETURN;

}


void ha_ndbcluster::no_uncommitted_rows_update(int c)

{

  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");

  struct Ndb_local_table_statistics *local_info= m_table_info;

  local_info->no_uncommitted_rows_count+= c;

  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",

                      ((const NDBTAB *)m_table)->getTableId(),

                      local_info->no_uncommitted_rows_count));

  DBUG_VOID_RETURN;

}


void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd)

{

  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_reset");

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  thd_ndb->count++;

  thd_ndb->m_error= FALSE;

  thd_ndb->m_unsent_bytes= 0;

  DBUG_VOID_RETURN;

}


int ha_ndbcluster::ndb_err(NdbTransaction *trans,

                           bool have_lock)

{

  THD *thd= current_thd;

  int res;

  NdbError err= trans->getNdbError();

  DBUG_ENTER("ndb_err");


  switch (err.classification) {

  case NdbError::SchemaError:

  {

    // TODO perhaps we need to do more here, invalidate also in the cache

    m_table->setStatusInvalid();

    /* Close other open handlers not used by any thread */

    TABLE_LIST table_list;

    memset(&table_list, 0, sizeof(table_list));

    table_list.db= m_dbname;

    table_list.alias= table_list.table_name= m_tabname;

    close_cached_tables(thd, &table_list, have_lock, FALSE, FALSE);

    break;

  }

  default:

    break;

  }

  res= ndb_to_mysql_error(&err);

  DBUG_PRINT("info", ("transformed ndbcluster error %d to mysql error %d",

                      err.code, res));

  if (res == HA_ERR_FOUND_DUPP_KEY)

  {

    char *error_data= err.details;

    uint dupkey= MAX_KEY;


    for (uint i= 0; i < MAX_KEY; i++)

    {

      if (m_index[i].type == UNIQUE_INDEX ||

          m_index[i].type == UNIQUE_ORDERED_INDEX)

      {

        const NDBINDEX *unique_index=

          (const NDBINDEX *) m_index[i].unique_index;

        if (unique_index &&

            (char *) unique_index->getObjectId() == error_data)

        {

          dupkey= i;

          break;

        }

      }

    }

    if (m_rows_to_insert == 1)

    {

      /*

        We can only distinguish between primary and non-primary

        violations here, so we need to return MAX_KEY for non-primary

        to signal that key is unknown

      */

      m_dupkey= err.code == 630 ? table_share->primary_key : dupkey;

    }

    else

    {

      /* We are batching inserts, offending key is not available */

      m_dupkey= (uint) -1;

    }

  }

  DBUG_RETURN(res);

}


bool ha_ndbcluster::get_error_message(int error,

                                      String *buf)

{

  DBUG_ENTER("ha_ndbcluster::get_error_message");

  DBUG_PRINT("enter", ("error: %d", error));


  Ndb *ndb= check_ndb_in_thd(current_thd);

  if (!ndb)

    DBUG_RETURN(FALSE);


  const NdbError err= ndb->getNdbError(error);

  bool temporary= err.status==NdbError::TemporaryError;

  buf->set(err.message, strlen(err.message), &my_charset_bin);

  DBUG_PRINT("exit", ("message: %s, temporary: %d", buf->ptr(), temporary));

  DBUG_RETURN(temporary);

}


/*

  field_used_length() returns the number of bytes actually used to

  store the data of the field. So for a varstring it includes both

  length byte(s) and string data, and anything after data_length()

  bytes are unused.

*/

static

uint32 field_used_length(const Field* field)

{

 if (field->type() == MYSQL_TYPE_VARCHAR)

 {

   const Field_varstring* f = static_cast<const Field_varstring*>(field);

   return f->length_bytes + const_cast<Field_varstring*>(f)->data_length();

                            // ^ no 'data_length() const'

 }

 return field->pack_length();

}


static bool field_type_forces_var_part(enum_field_types type)

{

  switch (type) {

  case MYSQL_TYPE_VAR_STRING:

  case MYSQL_TYPE_VARCHAR:

    return TRUE;

  case MYSQL_TYPE_TINY_BLOB:

  case MYSQL_TYPE_BLOB:

  case MYSQL_TYPE_MEDIUM_BLOB:

  case MYSQL_TYPE_LONG_BLOB:

  case MYSQL_TYPE_GEOMETRY:

    return FALSE;

  default:

    return FALSE;

  }

}


/*

 * This is used for every additional row operation, to update the guesstimate

 * of pending bytes to send, and to check if it is now time to flush a batch.

 */

bool

ha_ndbcluster::add_row_check_if_batch_full_size(Thd_ndb *thd_ndb, uint size)

{

  if (thd_ndb->m_unsent_bytes == 0)

    free_root(&(thd_ndb->m_batch_mem_root), MY_MARK_BLOCKS_FREE);


  uint unsent= thd_ndb->m_unsent_bytes;

  unsent+= size;

  thd_ndb->m_unsent_bytes= unsent;

  return unsent >= thd_ndb->m_batch_size;

}


/*

  Return a generic buffer that will remain valid until after next execute.


  The memory is freed by the first call to add_row_check_if_batch_full_size()

  following any execute() call. The intention is that the memory is associated

  with one batch of operations during batched slave updates.


  Note in particular that using get_buffer() / copy_row_to_buffer() separately

  from add_row_check_if_batch_full_size() could make meory usage grow without

  limit, and that this sequence:


    execute()

    get_buffer() / copy_row_to_buffer()

    add_row_check_if_batch_full_size()

    ...

    execute()


  will free the memory already at add_row_check_if_batch_full_size() time, it

  will not remain valid until the second execute().

*/

uchar *

ha_ndbcluster::get_buffer(Thd_ndb *thd_ndb, uint size)

{

  return (uchar*)alloc_root(&(thd_ndb->m_batch_mem_root), size);

}


uchar *

ha_ndbcluster::copy_row_to_buffer(Thd_ndb *thd_ndb, const uchar *record)

{

  uchar *row= get_buffer(thd_ndb, table->s->reclength);

  if (unlikely(!row))

    return NULL;

  memcpy(row, record, table->s->reclength);

  return row;

}


int findBlobError(NdbError& error, NdbBlob* pBlob)

{

  error= pBlob->getNdbError();

  if (error.code != 0)

    return 0;


  const NdbOperation* pOp= pBlob->getNdbOperation();

  error= pOp->getNdbError();

  if (error.code != 0)

    return 0;


  NdbTransaction* pTrans= pOp->getNdbTransaction();

  error= pTrans->getNdbError();

  if (error.code != 0)

    return 0;


  /* No error on any of the objects */

  return -1;

}


int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg)

{

  ha_ndbcluster *ha= (ha_ndbcluster *)arg;

  DBUG_ENTER("g_get_ndb_blobs_value");

  DBUG_PRINT("info", ("destination row: %p", ha->m_blob_destination_record));


  if (ha->m_blob_counter == 0)   /* Reset total size at start of row */

    ha->m_blobs_row_total_size= 0;


  /* Count the total length needed for blob data. */

  int isNull;

  if (ndb_blob->getNull(isNull) != 0)

    ERR_RETURN(ndb_blob->getNdbError());

  if (isNull == 0) {

    Uint64 len64= 0;

    if (ndb_blob->getLength(len64) != 0)

      ERR_RETURN(ndb_blob->getNdbError());

    /* Align to Uint64. */

    ha->m_blobs_row_total_size+= (len64 + 7) & ~((Uint64)7);

    if (ha->m_blobs_row_total_size > 0xffffffff)

    {

      DBUG_ASSERT(FALSE);

      DBUG_RETURN(-1);

    }

    DBUG_PRINT("info", ("Blob number %d needs size %llu, total buffer reqt. now %llu",

                        ha->m_blob_counter,

                        len64,

                        ha->m_blobs_row_total_size));

  }

  ha->m_blob_counter++;


  /*

    Wait until all blobs in this row are active, so we can allocate

    and use a common buffer containing all.

  */

  if (ha->m_blob_counter < ha->m_blob_expected_count_per_row)

    DBUG_RETURN(0);


  /* Reset blob counter for next row (scan scenario) */

  ha->m_blob_counter= 0;


  /* Re-allocate bigger blob buffer for this row if necessary. */

  if (ha->m_blobs_row_total_size > ha->m_blobs_buffer_size)

  {

    my_free(ha->m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));

    DBUG_PRINT("info", ("allocate blobs buffer size %u",

                        (uint32)(ha->m_blobs_row_total_size)));

    /* Windows compiler complains about my_malloc on non-size_t

     * validate mapping from Uint64 to size_t

     */

    if(((size_t)ha->m_blobs_row_total_size) != ha->m_blobs_row_total_size)

    {

      ha->m_blobs_buffer= NULL;

      ha->m_blobs_buffer_size= 0;

      DBUG_RETURN(-1);

    }


    ha->m_blobs_buffer=

      (uchar*) my_malloc((size_t) ha->m_blobs_row_total_size, MYF(MY_WME));

    if (ha->m_blobs_buffer == NULL)

    {

      ha->m_blobs_buffer_size= 0;

      DBUG_RETURN(-1);

    }

    ha->m_blobs_buffer_size= ha->m_blobs_row_total_size;

  }


  /*

    Now read all blob data.

    If we know the destination mysqld row, we also set the blob null bit and

    pointer/length (if not, it will be done instead in unpack_record()).

  */

  uint32 offset= 0;

  for (uint i= 0; i < ha->table->s->fields; i++)

  {

    Field *field= ha->table->field[i];

    if (! (field->flags & BLOB_FLAG))

      continue;

    NdbValue value= ha->m_value[i];

    if (value.blob == NULL)

    {

      DBUG_PRINT("info",("[%u] skipped", i));

      continue;

    }

    Field_blob *field_blob= (Field_blob *)field;

    NdbBlob *ndb_blob= value.blob;

    int isNull;

    if (ndb_blob->getNull(isNull) != 0)

      ERR_RETURN(ndb_blob->getNdbError());

    if (isNull == 0) {

      Uint64 len64= 0;

      if (ndb_blob->getLength(len64) != 0)

        ERR_RETURN(ndb_blob->getNdbError());

      DBUG_ASSERT(len64 < 0xffffffff);

      uchar *buf= ha->m_blobs_buffer + offset;

          uint32 len= (uint32)(ha->m_blobs_buffer_size - offset);

      if (ndb_blob->readData(buf, len) != 0)

      {

        NdbError err;

        if (findBlobError(err, ndb_blob) == 0)

        {

          ERR_RETURN(err);

        }

        else

        {

          /* Should always have some error code set */

          assert(err.code != 0);

          ERR_RETURN(err);

        }

      }

      DBUG_PRINT("info", ("[%u] offset: %u  buf: 0x%lx  len=%u",

                          i, offset, (long) buf, len));

      DBUG_ASSERT(len == len64);

      if (ha->m_blob_destination_record)

      {

        my_ptrdiff_t ptrdiff=

          ha->m_blob_destination_record - ha->table->record[0];

        field_blob->move_field_offset(ptrdiff);

        field_blob->set_ptr(len, buf);

        field_blob->set_notnull();

        field_blob->move_field_offset(-ptrdiff);

      }

      offset+= Uint32((len64 + 7) & ~((Uint64)7));

    }

    else if (ha->m_blob_destination_record)

    {

      /* Have to set length even in this case. */

      my_ptrdiff_t ptrdiff=

        ha->m_blob_destination_record - ha->table->record[0];

      uchar *buf= ha->m_blobs_buffer + offset;

      field_blob->move_field_offset(ptrdiff);

      field_blob->set_ptr((uint32)0, buf);

      field_blob->set_null();

      field_blob->move_field_offset(-ptrdiff);

      DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));

    }

  }


  if (!ha->m_active_cursor)

  {

    /* Non-scan, Blob reads have been issued

     * execute them and then close the Blob

     * handles

     */

    for (uint i= 0; i < ha->table->s->fields; i++)

    {

      Field *field= ha->table->field[i];

      if (! (field->flags & BLOB_FLAG))

        continue;

      NdbValue value= ha->m_value[i];

      if (value.blob == NULL)

      {

        DBUG_PRINT("info",("[%u] skipped", i));

        continue;

      }

      NdbBlob *ndb_blob= value.blob;


      assert(ndb_blob->getState() == NdbBlob::Active);


      /* Call close() with execPendingBlobOps == true

       * For LM_CommittedRead access, this will enqueue

       * an unlock operation, which the Blob framework

       * code invoking this callback will execute before

       * returning control to the caller of execute()

       */

      if (ndb_blob->close(true) != 0)

      {

        ERR_RETURN(ndb_blob->getNdbError());

      }

    }

  }


  DBUG_RETURN(0);

}


/*

  Request reading of blob values.


  If dst_record is specified, the blob null bit, pointer, and length will be

  set in that record. Otherwise they must be set later by calling

  unpack_record().

*/

int

ha_ndbcluster::get_blob_values(const NdbOperation *ndb_op, uchar *dst_record,

                               const MY_BITMAP *bitmap)

{

  uint i;

  DBUG_ENTER("ha_ndbcluster::get_blob_values");


  m_blob_counter= 0;

  m_blob_expected_count_per_row= 0;

  m_blob_destination_record= dst_record;

  m_blobs_row_total_size= 0;

  ndb_op->getNdbTransaction()->

    setMaxPendingBlobReadBytes(THDVAR(current_thd, blob_read_batch_bytes));


  for (i= 0; i < table_share->fields; i++)

  {

    Field *field= table->field[i];

    if (!(field->flags & BLOB_FLAG))

      continue;


    DBUG_PRINT("info", ("fieldnr=%d", i));

    NdbBlob *ndb_blob;

    if (bitmap_is_set(bitmap, i))

    {

      if ((ndb_blob= ndb_op->getBlobHandle(i)) == NULL ||

          ndb_blob->setActiveHook(g_get_ndb_blobs_value, this) != 0)

        DBUG_RETURN(1);

      m_blob_expected_count_per_row++;

    }

    else

      ndb_blob= NULL;


    m_value[i].blob= ndb_blob;

  }


  DBUG_RETURN(0);

}


int

ha_ndbcluster::set_blob_values(const NdbOperation *ndb_op,

                               my_ptrdiff_t row_offset, const MY_BITMAP *bitmap,

                               uint *set_count, bool batch)

{

  uint field_no;

  uint *blob_index, *blob_index_end;

  int res= 0;

  DBUG_ENTER("ha_ndbcluster::set_blob_values");


  *set_count= 0;


  if (table_share->blob_fields == 0)

    DBUG_RETURN(0);


  ndb_op->getNdbTransaction()->

    setMaxPendingBlobWriteBytes(THDVAR(current_thd, blob_write_batch_bytes));

  blob_index= table_share->blob_field;

  blob_index_end= blob_index + table_share->blob_fields;

  do

  {

    field_no= *blob_index;

    /* A NULL bitmap sets all blobs. */

    if (bitmap && !bitmap_is_set(bitmap, field_no))

      continue;

    Field *field= table->field[field_no];


    NdbBlob *ndb_blob= ndb_op->getBlobHandle(field_no);

    if (ndb_blob == NULL)

      ERR_RETURN(ndb_op->getNdbError());

    if (field->is_real_null(row_offset))

    {

      DBUG_PRINT("info", ("Setting Blob %d to NULL", field_no));

      if (ndb_blob->setNull() != 0)

        ERR_RETURN(ndb_op->getNdbError());

    }

    else

    {

      Field_blob *field_blob= (Field_blob *)field;


      // Get length and pointer to data

      const uchar *field_ptr= field->ptr + row_offset;

      uint32 blob_len= field_blob->get_length(field_ptr);

      uchar* blob_ptr= NULL;

      field_blob->get_ptr(&blob_ptr);


      // Looks like NULL ptr signals length 0 blob

      if (blob_ptr == NULL) {

        DBUG_ASSERT(blob_len == 0);

        blob_ptr= (uchar*)"";

      }


      DBUG_PRINT("value", ("set blob ptr: 0x%lx  len: %u",

                           (long) blob_ptr, blob_len));

      DBUG_DUMP("value", blob_ptr, MIN(blob_len, 26));


      /*

        NdbBlob requires the data pointer to remain valid until execute() time.

        So when batching, we need to copy the value to a temporary buffer.

      */

      if (batch && blob_len > 0)

      {

        uchar *tmp_buf= get_buffer(m_thd_ndb, blob_len);

        if (!tmp_buf)

          DBUG_RETURN(HA_ERR_OUT_OF_MEM);

        memcpy(tmp_buf, blob_ptr, blob_len);

        blob_ptr= tmp_buf;

      }

      res= ndb_blob->setValue((char*)blob_ptr, blob_len);

      if (res != 0)

        ERR_RETURN(ndb_op->getNdbError());

    }


    ++(*set_count);

  } while (++blob_index != blob_index_end);


  DBUG_RETURN(res);

}


/*

  This routine is shared by injector.  There is no common blobs buffer

  so the buffer and length are passed by reference.  Injector also

  passes a record pointer diff.

 */

int get_ndb_blobs_value(TABLE* table, NdbValue* value_array,

                        uchar*& buffer, uint& buffer_size,

                        my_ptrdiff_t ptrdiff)

{

  DBUG_ENTER("get_ndb_blobs_value");


  // Field has no field number so cannot use TABLE blob_field

  // Loop twice, first only counting total buffer size

  for (int loop= 0; loop <= 1; loop++)

  {

    uint32 offset= 0;

    for (uint i= 0; i < table->s->fields; i++)

    {

      Field *field= table->field[i];

      NdbValue value= value_array[i];

      if (! (field->flags & BLOB_FLAG))

        continue;

      if (value.blob == NULL)

      {

        DBUG_PRINT("info",("[%u] skipped", i));

        continue;

      }

      Field_blob *field_blob= (Field_blob *)field;

      NdbBlob *ndb_blob= value.blob;

      int isNull;

      if (ndb_blob->getNull(isNull) != 0)

        ERR_RETURN(ndb_blob->getNdbError());

      if (isNull == 0) {

        Uint64 len64= 0;

        if (ndb_blob->getLength(len64) != 0)

          ERR_RETURN(ndb_blob->getNdbError());

        // Align to Uint64

        uint32 size= Uint32(len64);

        if (size % 8 != 0)

          size+= 8 - size % 8;

        if (loop == 1)

        {

          uchar *buf= buffer + offset;

          uint32 len= 0xffffffff;  // Max uint32

          if (ndb_blob->readData(buf, len) != 0)

            ERR_RETURN(ndb_blob->getNdbError());

          DBUG_PRINT("info", ("[%u] offset: %u  buf: 0x%lx  len=%u  [ptrdiff=%d]",

                              i, offset, (long) buf, len, (int)ptrdiff));

          DBUG_ASSERT(len == len64);

          // Ugly hack assumes only ptr needs to be changed

          field_blob->set_ptr_offset(ptrdiff, len, buf);

        }

        offset+= size;

      }

      else if (loop == 1) // undefined or null

      {

        // have to set length even in this case

        uchar *buf= buffer + offset; // or maybe NULL

        uint32 len= 0;

        field_blob->set_ptr_offset(ptrdiff, len, buf);

        DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));

      }

    }

    if (loop == 0 && offset > buffer_size)

    {

      my_free(buffer, MYF(MY_ALLOW_ZERO_PTR));

      buffer_size= 0;

      DBUG_PRINT("info", ("allocate blobs buffer size %u", offset));

      buffer= (uchar*) my_malloc(offset, MYF(MY_WME));

      if (buffer == NULL)

      {

        sql_print_error("ha_ndbcluster::get_ndb_blobs_value: "

                        "my_malloc(%u) failed", offset);

        DBUG_RETURN(-1);

      }

      buffer_size= offset;

    }

  }

  DBUG_RETURN(0);

}


bool ha_ndbcluster::uses_blob_value(const MY_BITMAP *bitmap) const

{

  uint *blob_index, *blob_index_end;

  if (table_share->blob_fields == 0)

    return FALSE;


  blob_index=     table_share->blob_field;

  blob_index_end= blob_index + table_share->blob_fields;

  do

  {

    if (bitmap_is_set(bitmap, table->field[*blob_index]->field_index))

      return TRUE;

  } while (++blob_index != blob_index_end);

  return FALSE;

}


void ha_ndbcluster::release_blobs_buffer()

{

  DBUG_ENTER("releaseBlobsBuffer");

  if (m_blobs_buffer_size > 0)

  {

    DBUG_PRINT("info", ("Deleting blobs buffer, size %llu", m_blobs_buffer_size));

    my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));

    m_blobs_buffer= 0;

    m_blobs_row_total_size= 0;

    m_blobs_buffer_size= 0;

  }

  DBUG_VOID_RETURN;

}


int cmp_frm(const NDBTAB *ndbtab, const void *pack_data,

            uint pack_length)

{

  DBUG_ENTER("cmp_frm");

  /*

    Compare FrmData in NDB with frm file from disk.

  */

  if ((pack_length != ndbtab->getFrmLength()) ||

      (memcmp(pack_data, ndbtab->getFrmData(), pack_length)))

    DBUG_RETURN(1);

  DBUG_RETURN(0);

}


/*

  Does type support a default value?

*/

static bool

type_supports_default_value(enum_field_types mysql_type)

{

  bool ret = (mysql_type != MYSQL_TYPE_BLOB &&

              mysql_type != MYSQL_TYPE_TINY_BLOB &&

              mysql_type != MYSQL_TYPE_MEDIUM_BLOB &&

              mysql_type != MYSQL_TYPE_LONG_BLOB &&

              mysql_type != MYSQL_TYPE_GEOMETRY);


  return ret;

}


int ha_ndbcluster::check_default_values(const NDBTAB* ndbtab)

{

  /* Debug only method for checking table defaults aligned

     between MySQLD and Ndb

  */

  bool defaults_aligned= true;


  if (ndbtab->hasDefaultValues())

  {

    /* Ndb supports native defaults for non-pk columns */

    my_bitmap_map *old_map= tmp_use_all_columns(table, table->read_set);


    for (uint f=0; f < table_share->fields; f++)

    {

      Field* field= table->field[f]; // Use Field struct from MySQLD table rep

      const NdbDictionary::Column* ndbCol= ndbtab->getColumn(field->field_index);


      if ((! (field->flags & (PRI_KEY_FLAG |

                              NO_DEFAULT_VALUE_FLAG))) &&

          type_supports_default_value(field->real_type()))

      {

        /* We expect Ndb to have a native default for this

         * column

         */

        my_ptrdiff_t src_offset= table_share->default_values -

          field->table->record[0];


        /* Move field by offset to refer to default value */

        field->move_field_offset(src_offset);


        const uchar* ndb_default= (const uchar*) ndbCol->getDefaultValue();


        if (ndb_default == NULL)

          /* MySQLD default must also be NULL */

          defaults_aligned= field->is_null();

        else

        {

          if (field->type() != MYSQL_TYPE_BIT)

          {

            defaults_aligned= (0 == field->cmp(ndb_default));

          }

          else

          {

            longlong value= (static_cast<Field_bit*>(field))->val_int();

            /* Map to NdbApi format - two Uint32s */

            Uint32 out[2];

            out[0] = 0;

            out[1] = 0;

            for (int b=0; b < 64; b++)

            {

              out[b >> 5] |= (value & 1) << (b & 31);


              value= value >> 1;

            }

            Uint32 defaultLen = field_used_length(field);

            defaultLen = ((defaultLen + 3) & ~(Uint32)0x7);

            defaults_aligned= (0 == memcmp(ndb_default,

                                           out,

                                           defaultLen));

          }

        }


        field->move_field_offset(-src_offset);


        if (unlikely(!defaults_aligned))

        {

          DBUG_PRINT("info", ("Default values differ for column %u",

                              field->field_index));

          break;

        }

      }

      else

      {

        /* We don't expect Ndb to have a native default for this column */

        if (unlikely(ndbCol->getDefaultValue() != NULL))

        {

          /* Didn't expect that */

          DBUG_PRINT("info", ("Column %u has native default, but shouldn't."

                              " Flags=%u, type=%u",

                              field->field_index, field->flags, field->real_type()));

          defaults_aligned= false;

          break;

        }

      }

    }

    tmp_restore_column_map(table->read_set, old_map);

  }


  return (defaults_aligned? 0: -1);

}


int ha_ndbcluster::get_metadata(THD *thd, const char *path)

{

  Ndb *ndb= get_thd_ndb(thd)->ndb;

  NDBDICT *dict= ndb->getDictionary();

  const NDBTAB *tab;

  int error;

  DBUG_ENTER("get_metadata");

  DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path));


  DBUG_ASSERT(m_table == NULL);

  DBUG_ASSERT(m_table_info == NULL);


  uchar *data= NULL, *pack_data= NULL;

  size_t length, pack_length;


  /*

    Compare FrmData in NDB with frm file from disk.

  */

  error= 0;

  if (readfrm(path, &data, &length) ||

      packfrm(data, length, &pack_data, &pack_length))

  {

    my_free(data, MYF(MY_ALLOW_ZERO_PTR));

    my_free(pack_data, MYF(MY_ALLOW_ZERO_PTR));

    DBUG_RETURN(1);

  }


  ndb->setDatabaseName(m_dbname);

  Ndb_table_guard ndbtab_g(dict, m_tabname);

  if (!(tab= ndbtab_g.get_table()))

    ERR_RETURN(dict->getNdbError());


  if (get_ndb_share_state(m_share) != NSS_ALTERED

      && cmp_frm(tab, pack_data, pack_length))

  {

    DBUG_PRINT("error",

               ("metadata, pack_length: %lu  getFrmLength: %d  memcmp: %d",

                (ulong) pack_length, tab->getFrmLength(),

                memcmp(pack_data, tab->getFrmData(), pack_length)));

    DBUG_DUMP("pack_data", (uchar*) pack_data, pack_length);

    DBUG_DUMP("frm", (uchar*) tab->getFrmData(), tab->getFrmLength());

    error= HA_ERR_TABLE_DEF_CHANGED;

  }

  my_free((char*)data, MYF(0));

  my_free((char*)pack_data, MYF(0));


  /* Now check that any Ndb native defaults are aligned

     with MySQLD defaults

  */

  DBUG_ASSERT(check_default_values(tab) == 0);


  if (error)

    goto err;


  DBUG_PRINT("info", ("fetched table %s", tab->getName()));

  m_table= tab;


  if (bitmap_init(&m_bitmap, m_bitmap_buf, table_share->fields, 0))

  {

    error= HA_ERR_OUT_OF_MEM;

    goto err;

  }

  if (table_share->primary_key == MAX_KEY)

  {

    /* Hidden primary key. */

    if ((error= add_hidden_pk_ndb_record(dict)) != 0)

      goto err;

  }


  if ((error= add_table_ndb_record(dict)) != 0)

    goto err;


  /*

    Approx. write size in bytes over transporter

  */

  m_bytes_per_write= 12 + tab->getRowSizeInBytes() + 4 * tab->getNoOfColumns();


  /* Open indexes */

  if ((error= open_indexes(thd, ndb, table, FALSE)) != 0)

    goto err;


  /*

    Backward compatibility for tables created without tablespace

    in .frm => read tablespace setting from engine

  */

  if (table_share->mysql_version < 50120 &&

      !table_share->tablespace /* safety */)

  {

    Uint32 id;

    if (tab->getTablespace(&id))

    {

      NdbDictionary::Tablespace ts= dict->getTablespace(id);

      NdbError ndberr= dict->getNdbError();

      if (ndberr.classification == NdbError::NoError)

      {

        const char *tablespace= ts.getName();

        const size_t tablespace_len= strlen(tablespace);

        if (tablespace_len != 0)

        {

          DBUG_PRINT("info", ("Found tablespace '%s'", tablespace));

          table_share->tablespace= strmake_root(&table_share->mem_root,

                                                tablespace,

                                                tablespace_len);

        }

      }

    }

  }


  ndbtab_g.release();


#ifdef HAVE_NDB_BINLOG

  ndbcluster_read_binlog_replication(thd, ndb, m_share, m_table,

                                     ::server_id, table, FALSE);

#endif


  DBUG_RETURN(0);


err:

  ndbtab_g.invalidate();

  m_table= NULL;

  DBUG_RETURN(error);

}


static int fix_unique_index_attr_order(NDB_INDEX_DATA &data,

                                       const NDBINDEX *index,

                                       KEY *key_info)

{

  DBUG_ENTER("fix_unique_index_attr_order");

  unsigned sz= index->getNoOfIndexColumns();


  if (data.unique_index_attrid_map)

    my_free((char*)data.unique_index_attrid_map, MYF(0));

  data.unique_index_attrid_map= (uchar*)my_malloc(sz,MYF(MY_WME));

  if (data.unique_index_attrid_map == 0)

  {

    sql_print_error("fix_unique_index_attr_order: my_malloc(%u) failure",

                    (unsigned int)sz);

    DBUG_RETURN(HA_ERR_OUT_OF_MEM);

  }


  KEY_PART_INFO* key_part= key_info->key_part;

  KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;

  DBUG_ASSERT(key_info->user_defined_key_parts == sz);

  for (unsigned i= 0; key_part != end; key_part++, i++)

  {

    const char *field_name= key_part->field->field_name;

#ifndef DBUG_OFF

   data.unique_index_attrid_map[i]= 255;

#endif

    for (unsigned j= 0; j < sz; j++)

    {

      const NDBCOL *c= index->getColumn(j);

      if (strcmp(field_name, c->getName()) == 0)

      {

        data.unique_index_attrid_map[i]= j;

        break;

      }

    }

    DBUG_ASSERT(data.unique_index_attrid_map[i] != 255);

  }

  DBUG_RETURN(0);

}


/*

  Create all the indexes for a table.

  If any index should fail to be created,

  the error is returned immediately

*/

int ha_ndbcluster::create_indexes(THD *thd, Ndb *ndb, TABLE *tab)

{

  uint i;

  int error= 0;

  const char *index_name;

  KEY* key_info= tab->key_info;

  const char **key_name= tab->s->keynames.type_names;

  DBUG_ENTER("ha_ndbcluster::create_indexes");


  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)

  {

    index_name= *key_name;

    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);

    error= create_index(thd, index_name, key_info, idx_type, i);

    if (error)

    {

      DBUG_PRINT("error", ("Failed to create index %u", i));

      break;

    }

  }


  DBUG_RETURN(error);

}


static void ndb_init_index(NDB_INDEX_DATA &data)

{

  data.type= UNDEFINED_INDEX;

  data.status= UNDEFINED;

  data.unique_index= NULL;

  data.index= NULL;

  data.unique_index_attrid_map= NULL;

  data.ndb_record_key= NULL;

  data.ndb_unique_record_key= NULL;

  data.ndb_unique_record_row= NULL;

}


static void ndb_clear_index(NDBDICT *dict, NDB_INDEX_DATA &data)

{

  if (data.unique_index_attrid_map)

  {

    my_free((char*)data.unique_index_attrid_map, MYF(0));

  }

  if (data.ndb_unique_record_key)

    dict->releaseRecord(data.ndb_unique_record_key);

  if (data.ndb_unique_record_row)

    dict->releaseRecord(data.ndb_unique_record_row);

  if (data.ndb_record_key)

    dict->releaseRecord(data.ndb_record_key);

  ndb_init_index(data);

}


static

void ndb_protect_char(const char* from, char* to, uint to_length, char protect)

{

  uint fpos= 0, tpos= 0;


  while(from[fpos] != '\0' && tpos < to_length - 1)

  {

    if (from[fpos] == protect)

    {

      int len= 0;

      to[tpos++]= '@';

      if(tpos < to_length - 5)

      {

        len= sprintf(to+tpos, "00%u", (uint) protect);

        tpos+= len;

      }

    }

    else

    {

      to[tpos++]= from[fpos];

    }

    fpos++;

  }

  to[tpos]= '\0';

}


/*

  Associate a direct reference to an index handle

  with an index (for faster access)

 */

int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info,

                                    const char *key_name, uint index_no)

{

  char index_name[FN_LEN + 1];

  int error= 0;


  NDB_INDEX_TYPE idx_type= get_index_type_from_table(index_no);

  m_index[index_no].type= idx_type;

  DBUG_ENTER("ha_ndbcluster::add_index_handle");

  DBUG_PRINT("enter", ("table %s", m_tabname));


  ndb_protect_char(key_name, index_name, sizeof(index_name) - 1, '/');

  if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX)

  {

    DBUG_PRINT("info", ("Get handle to index %s", index_name));

    const NDBINDEX *index;

    do

    {

      index= dict->getIndexGlobal(index_name, *m_table);

      if (!index)

        ERR_RETURN(dict->getNdbError());

      DBUG_PRINT("info", ("index: 0x%lx  id: %d  version: %d.%d  status: %d",

                          (long) index,

                          index->getObjectId(),

                          index->getObjectVersion() & 0xFFFFFF,

                          index->getObjectVersion() >> 24,

                          index->getObjectStatus()));

      DBUG_ASSERT(index->getObjectStatus() ==

                  NdbDictionary::Object::Retrieved);

      break;

    } while (1);

    m_index[index_no].index= index;

  }

  if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)

  {

    char unique_index_name[FN_LEN + 1];

    static const char* unique_suffix= "$unique";

    m_has_unique_index= TRUE;

    strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS);

    DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name));

    const NDBINDEX *index;

    do

    {

      index= dict->getIndexGlobal(unique_index_name, *m_table);

      if (!index)

        ERR_RETURN(dict->getNdbError());

      DBUG_PRINT("info", ("index: 0x%lx  id: %d  version: %d.%d  status: %d",

                          (long) index,

                          index->getObjectId(),

                          index->getObjectVersion() & 0xFFFFFF,

                          index->getObjectVersion() >> 24,

                          index->getObjectStatus()));

      DBUG_ASSERT(index->getObjectStatus() ==

                  NdbDictionary::Object::Retrieved);

      break;

    } while (1);

    m_index[index_no].unique_index= index;

    error= fix_unique_index_attr_order(m_index[index_no], index, key_info);

  }


  if (!error)

    error= add_index_ndb_record(dict, key_info, index_no);


  if (!error)

    m_index[index_no].status= ACTIVE;


  DBUG_RETURN(error);

}


/*

  We use this function to convert null bit masks, as found in class Field,

  to bit numbers, as used in NdbRecord.

*/

static uint

null_bit_mask_to_bit_number(uchar bit_mask)

{

  switch (bit_mask)

  {

    case  0x1: return 0;

    case  0x2: return 1;

    case  0x4: return 2;

    case  0x8: return 3;

    case 0x10: return 4;

    case 0x20: return 5;

    case 0x40: return 6;

    case 0x80: return 7;

    default:

      DBUG_ASSERT(false);

      return 0;

  }

}


static void

ndb_set_record_specification(uint field_no,

                             NdbDictionary::RecordSpecification *spec,

                             const TABLE *table,

                             const NdbDictionary::Table *ndb_table)

{

  spec->column= ndb_table->getColumn(field_no);

  spec->offset= Uint32(table->field[field_no]->ptr - table->record[0]);

  if (table->field[field_no]->null_ptr)

  {

    spec->nullbit_byte_offset=

      Uint32(table->field[field_no]->null_ptr - table->record[0]);

    spec->nullbit_bit_in_byte=

      null_bit_mask_to_bit_number(table->field[field_no]->null_bit);

  }

  else if (table->field[field_no]->type() == MYSQL_TYPE_BIT)

  {

    /* We need to store the position of the overflow bits. */

    const Field_bit* field_bit= static_cast<Field_bit*>(table->field[field_no]);

    spec->nullbit_byte_offset=

      Uint32(field_bit->bit_ptr - table->record[0]);

    spec->nullbit_bit_in_byte= field_bit->bit_ofs;

  }

  else

  {

    spec->nullbit_byte_offset= 0;

    spec->nullbit_bit_in_byte= 0;

  }

}


int

ha_ndbcluster::add_table_ndb_record(NDBDICT *dict)

{

  DBUG_ENTER("ha_ndbcluster::add_table_ndb_record()");

  NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];

  NdbRecord *rec;

  uint i;


  for (i= 0; i < table_share->fields; i++)

  {

    ndb_set_record_specification(i, &spec[i], table, m_table);

  }


  rec= dict->createRecord(m_table, spec, i, sizeof(spec[0]),

                          NdbDictionary::RecMysqldBitfield);

  if (! rec)

    ERR_RETURN(dict->getNdbError());

  m_ndb_record= rec;


  DBUG_RETURN(0);

}


/* Create NdbRecord for setting hidden primary key from Uint64. */

int

ha_ndbcluster::add_hidden_pk_ndb_record(NDBDICT *dict)

{

  DBUG_ENTER("ha_ndbcluster::add_hidden_pk_ndb_record");

  NdbDictionary::RecordSpecification spec[1];

  NdbRecord *rec;


  spec[0].column= m_table->getColumn(table_share->fields);

  spec[0].offset= 0;

  spec[0].nullbit_byte_offset= 0;

  spec[0].nullbit_bit_in_byte= 0;


  rec= dict->createRecord(m_table, spec, 1, sizeof(spec[0]));

  if (! rec)

    ERR_RETURN(dict->getNdbError());

  m_ndb_hidden_key_record= rec;


  DBUG_RETURN(0);

}


int

ha_ndbcluster::add_index_ndb_record(NDBDICT *dict, KEY *key_info, uint index_no)

{

  DBUG_ENTER("ha_ndbcluster::add_index_ndb_record");

  NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];

  NdbRecord *rec;


  Uint32 offset= 0;

  for (uint i= 0; i < key_info->user_defined_key_parts; i++)

  {

    KEY_PART_INFO *kp= &key_info->key_part[i];


    spec[i].column= m_table->getColumn(kp->fieldnr - 1);

    if (! spec[i].column)

      ERR_RETURN(dict->getNdbError());

    if (kp->null_bit)

    {

      /* Nullable column. */

      spec[i].offset= offset + 1;           // First byte is NULL flag

      spec[i].nullbit_byte_offset= offset;

      spec[i].nullbit_bit_in_byte= 0;

    }

    else

    {

      /* Not nullable column. */

      spec[i].offset= offset;

      spec[i].nullbit_byte_offset= 0;

      spec[i].nullbit_bit_in_byte= 0;

    }

    offset+= kp->store_length;

  }


  if (m_index[index_no].index)

  {

    /*

      Enable MysqldShrinkVarchar flag so that the two-byte length used by

      mysqld for short varchar keys is correctly converted into a one-byte

      length used by Ndb kernel.

    */

    rec= dict->createRecord(m_index[index_no].index, m_table,

                            spec, key_info->user_defined_key_parts, sizeof(spec[0]),

                            ( NdbDictionary::RecMysqldShrinkVarchar |

                              NdbDictionary::RecMysqldBitfield ));

    if (! rec)

      ERR_RETURN(dict->getNdbError());

    m_index[index_no].ndb_record_key= rec;

  }

  else

    m_index[index_no].ndb_record_key= NULL;


  if (m_index[index_no].unique_index)

  {

    rec= dict->createRecord(m_index[index_no].unique_index, m_table,

                            spec, key_info->user_defined_key_parts, sizeof(spec[0]),

                            ( NdbDictionary::RecMysqldShrinkVarchar |

                              NdbDictionary::RecMysqldBitfield ));

    if (! rec)

      ERR_RETURN(dict->getNdbError());

    m_index[index_no].ndb_unique_record_key= rec;

  }

  else if (index_no == table_share->primary_key)

  {

    /* The primary key is special, there is no explicit NDB index associated. */

    rec= dict->createRecord(m_table,

                            spec, key_info->user_defined_key_parts, sizeof(spec[0]),

                            ( NdbDictionary::RecMysqldShrinkVarchar |

                              NdbDictionary::RecMysqldBitfield ));

    if (! rec)

      ERR_RETURN(dict->getNdbError());

    m_index[index_no].ndb_unique_record_key= rec;

  }

  else

    m_index[index_no].ndb_unique_record_key= NULL;


  /* Now do the same, but this time with offsets from Field, for row access. */

  for (uint i= 0; i < key_info->user_defined_key_parts; i++)

  {

    const KEY_PART_INFO *kp= &key_info->key_part[i];


    spec[i].offset= kp->offset;

    if (kp->null_bit)

    {

      /* Nullable column. */

      spec[i].nullbit_byte_offset= kp->null_offset;

      spec[i].nullbit_bit_in_byte= null_bit_mask_to_bit_number(kp->null_bit);

    }

    else

    {

      /* Not nullable column. */

      spec[i].nullbit_byte_offset= 0;

      spec[i].nullbit_bit_in_byte= 0;

    }

  }


  if (m_index[index_no].unique_index)

  {

    rec= dict->createRecord(m_index[index_no].unique_index, m_table,

                            spec, key_info->user_defined_key_parts, sizeof(spec[0]),

                            NdbDictionary::RecMysqldBitfield);

    if (! rec)

      ERR_RETURN(dict->getNdbError());

    m_index[index_no].ndb_unique_record_row= rec;

  }

  else if (index_no == table_share->primary_key)

  {

    rec= dict->createRecord(m_table,

                            spec, key_info->user_defined_key_parts, sizeof(spec[0]),

                            NdbDictionary::RecMysqldBitfield);

    if (! rec)

      ERR_RETURN(dict->getNdbError());

    m_index[index_no].ndb_unique_record_row= rec;

  }

  else

    m_index[index_no].ndb_unique_record_row= NULL;


  DBUG_RETURN(0);

}


/*

  Associate index handles for each index of a table

*/

int ha_ndbcluster::open_indexes(THD *thd, Ndb *ndb, TABLE *tab,

                                bool ignore_error)

{

  uint i;

  int error= 0;

  NDBDICT *dict= ndb->getDictionary();

  KEY* key_info= tab->key_info;

  const char **key_name= tab->s->keynames.type_names;

  DBUG_ENTER("ha_ndbcluster::open_indexes");

  m_has_unique_index= FALSE;

  btree_keys.clear_all();

  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)

  {

    if ((error= add_index_handle(thd, dict, key_info, *key_name, i)))

    {

      if (ignore_error)

        m_index[i].index= m_index[i].unique_index= NULL;

      else

        break;

    }

    m_index[i].null_in_unique_index= FALSE;

    if (check_index_fields_not_null(key_info))

      m_index[i].null_in_unique_index= TRUE;


    if (error == 0 && test(index_flags(i, 0, 0) & HA_READ_RANGE))

      btree_keys.set_bit(i);

  }


  if (error && !ignore_error)

  {

    while (i > 0)

    {

      i--;

      if (m_index[i].index)

      {

         dict->removeIndexGlobal(*m_index[i].index, 1);

         m_index[i].index= NULL;

      }

      if (m_index[i].unique_index)

      {

         dict->removeIndexGlobal(*m_index[i].unique_index, 1);

         m_index[i].unique_index= NULL;

      }

    }

  }


  DBUG_ASSERT(error == 0 || error == 4243);


  DBUG_RETURN(error);

}


/*

  Renumber indexes in index list by shifting out

  indexes that are to be dropped

 */

void ha_ndbcluster::renumber_indexes(Ndb *ndb, TABLE *tab)

{

  uint i;

  const char *index_name;

  KEY* key_info= tab->key_info;

  const char **key_name= tab->s->keynames.type_names;

  DBUG_ENTER("ha_ndbcluster::renumber_indexes");


  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)

  {

    index_name= *key_name;

    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);

    m_index[i].type= idx_type;

    if (m_index[i].status == TO_BE_DROPPED)

    {

      DBUG_PRINT("info", ("Shifting index %s(%i) out of the list",

                          index_name, i));

      NDB_INDEX_DATA tmp;

      uint j= i + 1;

      // Shift index out of list

      while(j != MAX_KEY && m_index[j].status != UNDEFINED)

      {

        tmp=  m_index[j - 1];

        m_index[j - 1]= m_index[j];

        m_index[j]= tmp;

        j++;

      }

    }

  }


  DBUG_VOID_RETURN;

}


/*

  Drop all indexes that are marked for deletion

*/

int ha_ndbcluster::drop_indexes(Ndb *ndb, TABLE *tab)

{

  uint i;

  int error= 0;

  const char *index_name;

  KEY* key_info= tab->key_info;

  NDBDICT *dict= ndb->getDictionary();

  DBUG_ENTER("ha_ndbcluster::drop_indexes");


  for (i= 0; i < tab->s->keys; i++, key_info++)

  {

    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);

    m_index[i].type= idx_type;

    if (m_index[i].status == TO_BE_DROPPED)

    {

      const NdbDictionary::Index *index= m_index[i].index;

      const NdbDictionary::Index *unique_index= m_index[i].unique_index;


      if (index)

      {

        index_name= index->getName();

        DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name));

        // Drop ordered index from ndb

        error= dict->dropIndexGlobal(*index);

        if (!error)

        {

          dict->removeIndexGlobal(*index, 1);

          m_index[i].index= NULL;

        }

      }

      if (!error && unique_index)

      {

        index_name= unique_index->getName();

        DBUG_PRINT("info", ("Dropping unique index %u: %s", i, index_name));

        // Drop unique index from ndb

        error= dict->dropIndexGlobal(*unique_index);

        if (!error)

        {

          dict->removeIndexGlobal(*unique_index, 1);

          m_index[i].unique_index= NULL;

        }

      }

      if (error)

        DBUG_RETURN(error);

      ndb_clear_index(dict, m_index[i]);

      continue;

    }

  }


  DBUG_RETURN(error);

}


NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const

{

  return get_index_type_from_key(inx, table_share->key_info,

                                 inx == table_share->primary_key);

}


NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_key(uint inx,

                                                      KEY *key_info,

                                                      bool primary) const

{

  bool is_hash_index=  (key_info[inx].algorithm ==

                        HA_KEY_ALG_HASH);

  if (primary)

    return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX;


  return ((key_info[inx].flags & HA_NOSAME) ?

          (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) :

          ORDERED_INDEX);

}


bool ha_ndbcluster::check_index_fields_not_null(KEY* key_info)

{

  KEY_PART_INFO* key_part= key_info->key_part;

  KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;

  DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null");


  for (; key_part != end; key_part++)

    {

      Field* field= key_part->field;

      if (field->maybe_null())

        DBUG_RETURN(TRUE);

    }


  DBUG_RETURN(FALSE);

}


void ha_ndbcluster::release_metadata(THD *thd, Ndb *ndb)

{

  uint i;


  DBUG_ENTER("release_metadata");

  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));


  NDBDICT *dict= ndb->getDictionary();

  int invalidate_indexes= 0;

  if (thd && thd->lex && thd->lex->sql_command == SQLCOM_FLUSH)

  {

    invalidate_indexes = 1;

  }

  if (m_table != NULL)

  {

    if (m_ndb_record != NULL)

    {

      dict->releaseRecord(m_ndb_record);

      m_ndb_record= NULL;

    }

    if (m_ndb_hidden_key_record != NULL)

    {

      dict->releaseRecord(m_ndb_hidden_key_record);

      m_ndb_hidden_key_record= NULL;

    }

    if (m_table->getObjectStatus() == NdbDictionary::Object::Invalid)

      invalidate_indexes= 1;

    dict->removeTableGlobal(*m_table, invalidate_indexes);

  }

  // TODO investigate

  DBUG_ASSERT(m_table_info == NULL);

  m_table_info= NULL;


  // Release index list

  for (i= 0; i < MAX_KEY; i++)

  {

    if (m_index[i].unique_index)

    {

      DBUG_ASSERT(m_table != NULL);

      dict->removeIndexGlobal(*m_index[i].unique_index, invalidate_indexes);

    }

    if (m_index[i].index)

    {

      DBUG_ASSERT(m_table != NULL);

      dict->removeIndexGlobal(*m_index[i].index, invalidate_indexes);

    }

    ndb_clear_index(dict, m_index[i]);

  }


  m_table= NULL;

  DBUG_VOID_RETURN;

}


/*

  Map from thr_lock_type to NdbOperation::LockMode

*/

static inline

NdbOperation::LockMode get_ndb_lock_mode(enum thr_lock_type type)

{

  if (type >= TL_WRITE_ALLOW_WRITE)

    return NdbOperation::LM_Exclusive;

  if (type ==  TL_READ_WITH_SHARED_LOCKS)

    return NdbOperation::LM_Read;

  return NdbOperation::LM_CommittedRead;

}


static const ulong index_type_flags[]=

{

  /* UNDEFINED_INDEX */

  0,


  /* PRIMARY_KEY_INDEX */

  HA_ONLY_WHOLE_INDEX,


  /* PRIMARY_KEY_ORDERED_INDEX */

  /*

     Enable HA_KEYREAD_ONLY when "sorted" indexes are supported,

     thus ORDER BY clauses can be optimized by reading directly

     through the index.

  */

  // HA_KEYREAD_ONLY |

  HA_READ_NEXT |

  HA_READ_PREV |

  HA_READ_RANGE |

  HA_READ_ORDER,


  /* UNIQUE_INDEX */

  HA_ONLY_WHOLE_INDEX,


  /* UNIQUE_ORDERED_INDEX */

  HA_READ_NEXT |

  HA_READ_PREV |

  HA_READ_RANGE |

  HA_READ_ORDER,


  /* ORDERED_INDEX */

  HA_READ_NEXT |

  HA_READ_PREV |

  HA_READ_RANGE |

  HA_READ_ORDER

};


static const int index_flags_size= sizeof(index_type_flags)/sizeof(ulong);


inline NDB_INDEX_TYPE ha_ndbcluster::get_index_type(uint idx_no) const

{

  DBUG_ASSERT(idx_no < MAX_KEY);

  return m_index[idx_no].type;

}


inline bool ha_ndbcluster::has_null_in_unique_index(uint idx_no) const

{

  DBUG_ASSERT(idx_no < MAX_KEY);

  return m_index[idx_no].null_in_unique_index;

}


inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part,

                                        bool all_parts) const

{

  DBUG_ENTER("ha_ndbcluster::index_flags");

  DBUG_PRINT("enter", ("idx_no: %u", idx_no));

  DBUG_ASSERT(get_index_type_from_table(idx_no) < index_flags_size);

  DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] |

              HA_KEY_SCAN_NOT_ROR);

}


bool

ha_ndbcluster::primary_key_is_clustered()

{


  if (table->s->primary_key == MAX_KEY)

    return false;


  /*

    NOTE 1: our ordered indexes are not really clustered

    but since accesing data when scanning index is free

    it's a good approximation


    NOTE 2: We really should consider DD attributes here too

    (for which there is IO to read data when scanning index)

    but that will need to be handled later...

  */

  const ndb_index_type idx_type =

    get_index_type_from_table(table->s->primary_key);

  return (idx_type == PRIMARY_KEY_ORDERED_INDEX ||

          idx_type == UNIQUE_ORDERED_INDEX ||

          idx_type == ORDERED_INDEX);

}


bool ha_ndbcluster::check_index_fields_in_write_set(uint keyno)

{

  KEY* key_info= table->key_info + keyno;

  KEY_PART_INFO* key_part= key_info->key_part;

  KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;

  uint i;

  DBUG_ENTER("check_index_fields_in_write_set");


  for (i= 0; key_part != end; key_part++, i++)

  {

    Field* field= key_part->field;

    if (!bitmap_is_set(table->write_set, field->field_index))

    {

      DBUG_RETURN(false);

    }

  }


  DBUG_RETURN(true);

}


int ha_ndbcluster::pk_read(const uchar *key, uint key_len, uchar *buf,

                           uint32 *part_id)

{

  NdbConnection *trans= m_thd_ndb->trans;

  int res;

  DBUG_ENTER("pk_read");

  DBUG_PRINT("enter", ("key_len: %u read_set=%x",

                       key_len, table->read_set->bitmap[0]));

  DBUG_DUMP("key", key, key_len);

  DBUG_ASSERT(trans);


  NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);


#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

  if (check_if_pushable(NdbQueryOperationDef::PrimaryKeyAccess, table->s->primary_key))

  {

    // Is parent of pushed join

    DBUG_ASSERT(lm == NdbOperation::LM_CommittedRead);

    const int error= pk_unique_index_read_key_pushed(table->s->primary_key, key,

                                                     (m_user_defined_partitioning ?

                                                     part_id : NULL));

    if (unlikely(error))

      DBUG_RETURN(error);


    DBUG_ASSERT(m_active_query!=NULL);

    if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||

        m_active_query->getNdbError().code)

    {

      table->status= STATUS_NOT_FOUND;

      DBUG_RETURN(ndb_err(trans));

    }


    int result= fetch_next_pushed();

    if (result == NdbQuery::NextResult_gotRow)

    {

      DBUG_RETURN(0);

    }

    else if (result == NdbQuery::NextResult_scanComplete)

    {

      DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);

    }

    else

    {

      DBUG_RETURN(ndb_err(trans));

    }

  }

  else

#endif

  {

    if (m_pushed_join_operation == PUSHED_ROOT)

    {

      m_thd_ndb->m_pushed_queries_dropped++;

    }


    const NdbOperation *op;

    if (!(op= pk_unique_index_read_key(table->s->primary_key, key, buf, lm,

                                       (m_user_defined_partitioning ?

                                        part_id :

                                        NULL))))

      ERR_RETURN(trans->getNdbError());


    if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||

        op->getNdbError().code)

    {

      table->status= STATUS_NOT_FOUND;

      DBUG_RETURN(ndb_err(trans));

    }

    table->status= 0;

    DBUG_RETURN(0);

  }

}


int ha_ndbcluster::ndb_pk_update_row(THD *thd,

                                     const uchar *old_data, uchar *new_data,

                                     uint32 old_part_id)

{

  NdbTransaction *trans= m_thd_ndb->trans;

  int error;

  const NdbOperation *op;

  DBUG_ENTER("ndb_pk_update_row");

  DBUG_ASSERT(trans);


  NdbOperation::OperationOptions *poptions = NULL;

  NdbOperation::OperationOptions options;

  options.optionsPresent=0;


  DBUG_PRINT("info", ("primary key update or partition change, "

                      "doing read+delete+insert"));

  // Get all old fields, since we optimize away fields not in query


  const NdbRecord *key_rec;

  const uchar *key_row;


  if (m_user_defined_partitioning)

  {

    options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;

    options.partitionId=old_part_id;

    poptions=&options;

  }


  setup_key_ref_for_ndb_record(&key_rec, &key_row, old_data, FALSE);


  if (!bitmap_is_set_all(table->read_set))

  {

    /*

      Need to read rest of columns for later re-insert.


      Use mask only with columns that are not in write_set, not in

      read_set, and not part of the primary key.

    */


    bitmap_copy(&m_bitmap, table->read_set);

    bitmap_union(&m_bitmap, table->write_set);

    bitmap_invert(&m_bitmap);

    if (!(op= trans->readTuple(key_rec, (const char *)key_row,

                               m_ndb_record, (char *)new_data,

                               get_ndb_lock_mode(m_lock.type),

                               (const unsigned char *)(m_bitmap.bitmap),

                               poptions,

                               sizeof(NdbOperation::OperationOptions))))

      ERR_RETURN(trans->getNdbError());


    if (table_share->blob_fields > 0)

    {

      my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);

      error= get_blob_values(op, new_data, &m_bitmap);

      dbug_tmp_restore_column_map(table->read_set, old_map);

      if (error != 0)

        ERR_RETURN(op->getNdbError());

    }

    if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)

    {

      table->status= STATUS_NOT_FOUND;

      DBUG_RETURN(ndb_err(trans));

    }

  }


  // Delete old row

  error= ndb_delete_row(old_data, TRUE);

  if (error)

  {

    DBUG_PRINT("info", ("delete failed"));

    DBUG_RETURN(error);

  }


  // Insert new row

  DBUG_PRINT("info", ("delete succeded"));

  bool batched_update= (m_active_cursor != 0);

  /*

    If we are updating a primary key with auto_increment

    then we need to update the auto_increment counter

  */

  if (table->found_next_number_field &&

      bitmap_is_set(table->write_set,

                    table->found_next_number_field->field_index) &&

      (error= set_auto_inc(thd, table->found_next_number_field)))

  {

    DBUG_RETURN(error);

  }


  /*

    We are mapping a MySQLD PK changing update to an NdbApi delete

    and insert.

    The original PK changing update may not have written new values

    to all columns, so the write set may be partial.

    We set the write set to be all columns so that all values are

    copied from the old row to the new row.

  */

  my_bitmap_map *old_map=

    tmp_use_all_columns(table, table->write_set);

  error= ndb_write_row(new_data, TRUE, batched_update);

  tmp_restore_column_map(table->write_set, old_map);


  if (error)

  {

    DBUG_PRINT("info", ("insert failed"));

    if (trans->commitStatus() == NdbConnection::Started)

    {

      if (thd->slave_thread)

        g_ndb_slave_state.atTransactionAbort();

      m_thd_ndb->m_unsent_bytes= 0;

      m_thd_ndb->m_execute_count++;

      DBUG_PRINT("info", ("execute_count: %u", m_thd_ndb->m_execute_count));

      trans->execute(NdbTransaction::Rollback);

#ifdef FIXED_OLD_DATA_TO_ACTUALLY_CONTAIN_GOOD_DATA

      int undo_res;

      // Undo delete_row(old_data)

      undo_res= ndb_write_row((uchar *)old_data, TRUE, batched_update);

      if (undo_res)

        push_warning(table->in_use,

                     Sql_condition::WARN_LEVEL_WARN,

                     undo_res,

                     "NDB failed undoing delete at primary key update");

#endif

    }

    DBUG_RETURN(error);

  }

  DBUG_PRINT("info", ("delete+insert succeeded"));


  DBUG_RETURN(0);

}


bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans,

                                                   const NdbOperation *first,

                                                   const NdbOperation *last,

                                                   uint errcode)

{

  const NdbOperation *op= first;

  DBUG_ENTER("ha_ndbcluster::check_all_operations_for_error");


  while(op)

  {

    NdbError err= op->getNdbError();

    if (err.status != NdbError::Success)

    {

      if (ndb_to_mysql_error(&err) != (int) errcode)

        DBUG_RETURN(FALSE);

      if (op == last) break;

      op= trans->getNextCompletedOperation(op);

    }

    else

    {

      // We found a duplicate

      if (op->getType() == NdbOperation::UniqueIndexAccess)

      {

        if (errcode == HA_ERR_KEY_NOT_FOUND)

        {

          NdbIndexOperation *iop= (NdbIndexOperation *) op;

          const NDBINDEX *index= iop->getIndex();

          // Find the key_no of the index

          for(uint i= 0; i<table->s->keys; i++)

          {

            if (m_index[i].unique_index == index)

            {

              m_dupkey= i;

              break;

            }

          }

        }

      }

      else

      {

        // Must have been primary key access

        DBUG_ASSERT(op->getType() == NdbOperation::PrimaryKeyAccess);

        if (errcode == HA_ERR_KEY_NOT_FOUND)

          m_dupkey= table->s->primary_key;

      }

      DBUG_RETURN(FALSE);

    }

  }

  DBUG_RETURN(TRUE);

}


static

int

check_null_in_record(const KEY* key_info, const uchar *record)

{

  KEY_PART_INFO *curr_part, *end_part;

  curr_part= key_info->key_part;

  end_part= curr_part + key_info->user_defined_key_parts;


  while (curr_part != end_part)

  {

    if (curr_part->null_bit &&

        (record[curr_part->null_offset] & curr_part->null_bit))

      return 1;

    curr_part++;

  }

  return 0;

  /*

    We could instead pre-compute a bitmask in table_share with one bit for

    every null-bit in the key, and so check this just by OR'ing the bitmask

    with the null bitmap in the record.

    But not sure it's worth it.

  */

}


/* Empty mask and dummy row, for reading no attributes using NdbRecord. */

/* Mask will be initialized to all zeros by linker. */

static unsigned char empty_mask[(NDB_MAX_ATTRIBUTES_IN_TABLE+7)/8];

static char dummy_row[1];


int ha_ndbcluster::peek_indexed_rows(const uchar *record,

                                     NDB_WRITE_OP write_op)

{

  NdbTransaction *trans;

  const NdbOperation *op;

  const NdbOperation *first, *last;

  NdbOperation::OperationOptions options;

  NdbOperation::OperationOptions *poptions=NULL;

  options.optionsPresent = 0;

  uint i;

  int res, error;

  DBUG_ENTER("peek_indexed_rows");

  if (unlikely(!(trans= get_transaction(error))))

  {

    DBUG_RETURN(error);

  }

  const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);

  first= NULL;

  if (write_op != NDB_UPDATE && table->s->primary_key != MAX_KEY)

  {

    /*

     * Fetch any row with colliding primary key

     */

    const NdbRecord *key_rec=

      m_index[table->s->primary_key].ndb_unique_record_row;


    if (m_user_defined_partitioning)

    {

      uint32 part_id;

      int error;

      longlong func_value;

      my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);

      error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);

      dbug_tmp_restore_column_map(table->read_set, old_map);

      if (error)

      {

        m_part_info->err_value= func_value;

        DBUG_RETURN(error);

      }

      options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;

      options.partitionId=part_id;

      poptions=&options;

    }


    if (!(op= trans->readTuple(key_rec, (const char *)record,

                               m_ndb_record, dummy_row, lm, empty_mask,

                               poptions,

                               sizeof(NdbOperation::OperationOptions))))

      ERR_RETURN(trans->getNdbError());


    first= op;

  }

  /*

   * Fetch any rows with colliding unique indexes

   */

  KEY* key_info;

  for (i= 0, key_info= table->key_info; i < table->s->keys; i++, key_info++)

  {

    if (i != table_share->primary_key &&

        key_info->flags & HA_NOSAME &&

        bitmap_is_overlapping(table->write_set, m_key_fields[i]))

    {

      /*

        A unique index is defined on table and it's being updated

        We cannot look up a NULL field value in a unique index. But since

        keys with NULLs are not indexed, such rows cannot conflict anyway, so

        we just skip the index in this case.

      */

      if (check_null_in_record(key_info, record))

      {

        DBUG_PRINT("info", ("skipping check for key with NULL"));

        continue;

      }

      if (write_op != NDB_INSERT && !check_index_fields_in_write_set(i))

      {

        DBUG_PRINT("info", ("skipping check for key %u not in write_set", i));

        continue;

      }


      const NdbOperation *iop;

      const NdbRecord *key_rec= m_index[i].ndb_unique_record_row;

      if (!(iop= trans->readTuple(key_rec, (const char *)record,

                                  m_ndb_record, dummy_row,

                                  lm, empty_mask)))

        ERR_RETURN(trans->getNdbError());


      if (!first)

        first= iop;

    }

  }

  last= trans->getLastDefinedOperation();

  if (first)

    res= execute_no_commit_ie(m_thd_ndb, trans);

  else

  {

    // Table has no keys

    table->status= STATUS_NOT_FOUND;

    DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);

  }

  if (check_all_operations_for_error(trans, first, last,

                                     HA_ERR_KEY_NOT_FOUND))

  {

    table->status= STATUS_NOT_FOUND;

    DBUG_RETURN(ndb_err(trans));

  }

  else

  {

    DBUG_PRINT("info", ("m_dupkey %d", m_dupkey));

  }

  DBUG_RETURN(0);

}


int ha_ndbcluster::unique_index_read(const uchar *key,

                                     uint key_len, uchar *buf)

{

  NdbTransaction *trans= m_thd_ndb->trans;

  DBUG_ENTER("ha_ndbcluster::unique_index_read");

  DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index));

  DBUG_DUMP("key", key, key_len);

  DBUG_ASSERT(trans);


  NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);


#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

  if (check_if_pushable(NdbQueryOperationDef::UniqueIndexAccess, active_index))

  {

    DBUG_ASSERT(lm == NdbOperation::LM_CommittedRead);

    const int error= pk_unique_index_read_key_pushed(active_index, key, NULL);

    if (unlikely(error))

      DBUG_RETURN(error);


    DBUG_ASSERT(m_active_query!=NULL);

    if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||

        m_active_query->getNdbError().code)

    {

      table->status= STATUS_GARBAGE;

      DBUG_RETURN(ndb_err(trans));

    }


    int result= fetch_next_pushed();

    if (result == NdbQuery::NextResult_gotRow)

    {

      DBUG_RETURN(0);

    }

    else if (result == NdbQuery::NextResult_scanComplete)

    {

      DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);

    }

    else

    {

      DBUG_RETURN(ndb_err(trans));

    }

  }

  else

#endif

  {

    if (m_pushed_join_operation == PUSHED_ROOT)

    {

      m_thd_ndb->m_pushed_queries_dropped++;

    }


    const NdbOperation *op;


    if (!(op= pk_unique_index_read_key(active_index, key, buf, lm, NULL)))

      ERR_RETURN(trans->getNdbError());


    if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||

        op->getNdbError().code)

    {

      int err= ndb_err(trans);

      if(err==HA_ERR_KEY_NOT_FOUND)

        table->status= STATUS_NOT_FOUND;

      else

        table->status= STATUS_GARBAGE;


      DBUG_RETURN(err);

    }


    table->status= 0;

    DBUG_RETURN(0);

  }

}


int

ha_ndbcluster::scan_handle_lock_tuple(NdbScanOperation *scanOp,

                                      NdbTransaction *trans)

{

  DBUG_ENTER("ha_ndbcluster::scan_handle_lock_tuple");

  if (m_lock_tuple)

  {

    /*

      Lock level m_lock.type either TL_WRITE_ALLOW_WRITE

      (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT

      LOCK WITH SHARE MODE) and row was not explictly unlocked

      with unlock_row() call

    */

    const NdbOperation *op;

    // Lock row

    DBUG_PRINT("info", ("Keeping lock on scanned row"));


    if (!(op= scanOp->lockCurrentTuple(trans, m_ndb_record,

                                       dummy_row, empty_mask)))

    {

      /* purecov: begin inspected */

      m_lock_tuple= FALSE;

      ERR_RETURN(trans->getNdbError());

      /* purecov: end */

    }

    m_thd_ndb->m_unsent_bytes+=12;

  }

  m_lock_tuple= FALSE;

  DBUG_RETURN(0);

}


inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor)

{

  DBUG_ENTER("fetch_next");

  int local_check;

  int error;

  NdbTransaction *trans= m_thd_ndb->trans;


  DBUG_ASSERT(trans);

  if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)

    DBUG_RETURN(error);


  bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE &&

                    m_lock.type != TL_READ_WITH_SHARED_LOCKS;

  do {

    DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb));

    /*

      We can only handle one tuple with blobs at a time.

    */

    if (m_thd_ndb->m_unsent_bytes && m_blobs_pending)

    {

      if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)

        DBUG_RETURN(ndb_err(trans));

    }


    /* Should be no unexamined completed operations

       nextResult() on Blobs generates Blob part read ops,

       so we will free them here

    */

    release_completed_operations(trans);


    if ((local_check= cursor->nextResult(&_m_next_row,

                                         contact_ndb,

                                         m_thd_ndb->m_force_send)) == 0)

    {

      /*

        Explicitly lock tuple if "select for update" or

        "select lock in share mode"

      */

      m_lock_tuple= (m_lock.type == TL_WRITE_ALLOW_WRITE

                     ||

                     m_lock.type == TL_READ_WITH_SHARED_LOCKS);

      DBUG_RETURN(0);

    }

    else if (local_check == 1 || local_check == 2)

    {

      // 1: No more records

      // 2: No more cached records


      /*

        Before fetching more rows and releasing lock(s),

        all pending update or delete operations should

        be sent to NDB

      */

      DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",

                          (long) m_thd_ndb->m_unsent_bytes));

      if (m_thd_ndb->m_unsent_bytes)

      {

        if ((error = flush_bulk_insert()) != 0)

          DBUG_RETURN(error);

      }

      contact_ndb= (local_check == 2);

    }

    else

    {

      DBUG_RETURN(ndb_err(trans));

    }

  } while (local_check == 2);


  DBUG_RETURN(1);

}


int ha_ndbcluster::fetch_next_pushed()

{

  DBUG_ENTER("fetch_next_pushed (from pushed operation)");


  DBUG_ASSERT(m_pushed_operation);

  NdbQuery::NextResultOutcome result= m_pushed_operation->nextResult(true, m_thd_ndb->m_force_send);


  if (result == NdbQuery::NextResult_gotRow)

  {

    DBUG_ASSERT(m_next_row!=NULL);

    DBUG_PRINT("info", ("One more record found"));

    table->status= 0;

    unpack_record(table->record[0], m_next_row);

//  m_thd_ndb->m_pushed_reads++;

//  DBUG_RETURN(0)

  }

  else if (result == NdbQuery::NextResult_scanComplete)

  {

    DBUG_ASSERT(m_next_row==NULL);

    DBUG_PRINT("info", ("No more records"));

    table->status= STATUS_NOT_FOUND;

//  m_thd_ndb->m_pushed_reads++;

//  DBUG_RETURN(HA_ERR_END_OF_FILE);

  }

  else

  {

    DBUG_PRINT("info", ("Error from 'nextResult()'"));

    table->status= STATUS_GARBAGE;

//  DBUG_ASSERT(false);

//  DBUG_RETURN(ndb_err(m_thd_ndb->trans));

  }

  DBUG_RETURN(result);

}


int

ha_ndbcluster::index_read_pushed(uchar *buf, const uchar *key,

                                 key_part_map keypart_map)

{

  DBUG_ENTER("index_read_pushed");


  // Handler might have decided to not execute the pushed joins which has been prepared

  // In this case we do an unpushed index_read based on 'Plain old' NdbOperations

  if (unlikely(!check_is_pushed()))

  {

    DBUG_RETURN(index_read_map(buf, key, keypart_map, HA_READ_KEY_EXACT));

  }


  // Might need to re-establish first result row (wrt. its parents which may have been navigated)

  NdbQuery::NextResultOutcome result= m_pushed_operation->firstResult();


  // Result from pushed operation will be referred by 'm_next_row' if non-NULL

  if (result == NdbQuery::NextResult_gotRow)

  {

    DBUG_ASSERT(m_next_row!=NULL);

    unpack_record(buf, m_next_row);

    table->status= 0;

    m_thd_ndb->m_pushed_reads++;

  }

  else

  {

    DBUG_ASSERT(result!=NdbQuery::NextResult_gotRow);

    table->status= STATUS_NOT_FOUND;

    DBUG_PRINT("info", ("No record found"));

//  m_thd_ndb->m_pushed_reads++;

//  DBUG_RETURN(HA_ERR_END_OF_FILE);

  }

  DBUG_RETURN(0);

}


int ha_ndbcluster::index_next_pushed(uchar *buf)

{

  DBUG_ENTER("index_next_pushed");


  // Handler might have decided to not execute the pushed joins which has been prepared

  // In this case we do an unpushed index_read based on 'Plain old' NdbOperations

  if (unlikely(!check_is_pushed()))

  {

    DBUG_RETURN(index_next(buf));

  }


  DBUG_ASSERT(m_pushed_join_operation>PUSHED_ROOT);  // Child of a pushed join

  DBUG_ASSERT(m_active_query==NULL);


  int res = fetch_next_pushed();

  if (res == NdbQuery::NextResult_gotRow)

  {

    DBUG_RETURN(0);

  }

  else if (res == NdbQuery::NextResult_scanComplete)

  {

    DBUG_RETURN(HA_ERR_END_OF_FILE);

  }

  else

  {

    DBUG_RETURN(ndb_err(m_thd_ndb->trans));

  }

}


inline int ha_ndbcluster::next_result(uchar *buf)

{

  int res;

  DBUG_ENTER("next_result");


  if (m_active_cursor)

  {

    if ((res= fetch_next(m_active_cursor)) == 0)

    {

      DBUG_PRINT("info", ("One more record found"));


      unpack_record(buf, m_next_row);

      table->status= 0;

      DBUG_RETURN(0);

    }

    else if (res == 1)

    {

      // No more records

      table->status= STATUS_NOT_FOUND;


      DBUG_PRINT("info", ("No more records"));

      DBUG_RETURN(HA_ERR_END_OF_FILE);

    }

    else

    {

      DBUG_RETURN(ndb_err(m_thd_ndb->trans));

    }

  }

  else if (m_active_query)

  {

    res= fetch_next_pushed();

    if (res == NdbQuery::NextResult_gotRow)

    {

      DBUG_RETURN(0);

    }

    else if (res == NdbQuery::NextResult_scanComplete)

    {

      DBUG_RETURN(HA_ERR_END_OF_FILE);

    }

    else

    {

      DBUG_RETURN(ndb_err(m_thd_ndb->trans));

    }

  }

  else

    DBUG_RETURN(HA_ERR_END_OF_FILE);

}


const NdbOperation *

ha_ndbcluster::pk_unique_index_read_key(uint idx, const uchar *key, uchar *buf,

                                        NdbOperation::LockMode lm,

                                        Uint32 *ppartition_id)

{

  const NdbOperation *op;

  const NdbRecord *key_rec;

  NdbOperation::OperationOptions options;

  NdbOperation::OperationOptions *poptions = NULL;

  options.optionsPresent= 0;

  NdbOperation::GetValueSpec gets[2];


  DBUG_ASSERT(m_thd_ndb->trans);


  if (idx != MAX_KEY)

    key_rec= m_index[idx].ndb_unique_record_key;

  else

    key_rec= m_ndb_hidden_key_record;


  /* Initialize the null bitmap, setting unused null bits to 1. */

  memset(buf, 0xff, table->s->null_bytes);


  if (table_share->primary_key == MAX_KEY)

  {

    get_hidden_fields_keyop(&options, gets);

    poptions= &options;

  }


  if (ppartition_id != NULL)

  {

    assert(m_user_defined_partitioning);

    options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;

    options.partitionId= *ppartition_id;

    poptions= &options;

  }


  op= m_thd_ndb->trans->readTuple(key_rec, (const char *)key, m_ndb_record,

                                  (char *)buf, lm,

                                  (uchar *)(table->read_set->bitmap), poptions,

                                  sizeof(NdbOperation::OperationOptions));


  if (uses_blob_value(table->read_set) &&

      get_blob_values(op, buf, table->read_set) != 0)

    return NULL;


  return op;

}


extern void sql_print_information(const char *format, ...);


#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

static

bool

is_shrinked_varchar(const Field *field)

{

  if (field->real_type() ==  MYSQL_TYPE_VARCHAR)

  {

    if (((Field_varstring*)field)->length_bytes == 1)

      return true;

  }


  return false;

}


int

ha_ndbcluster::pk_unique_index_read_key_pushed(uint idx,

                                               const uchar *key,

                                               Uint32 *ppartition_id)

{

  DBUG_ENTER("pk_unique_index_read_key_pushed");

  NdbOperation::OperationOptions options;

  NdbOperation::OperationOptions *poptions = NULL;

  options.optionsPresent= 0;

  NdbOperation::GetValueSpec gets[2];


  DBUG_ASSERT(m_thd_ndb->trans);

  DBUG_ASSERT(idx < MAX_KEY);


  if (m_active_query)

  {

    m_active_query->close(FALSE);

    m_active_query= NULL;

  }


  if (table_share->primary_key == MAX_KEY)

  {

    get_hidden_fields_keyop(&options, gets);

    poptions= &options;

  }


  if (ppartition_id != NULL)

  {

    assert(m_user_defined_partitioning);

    options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;

    options.partitionId= *ppartition_id;

    poptions= &options;

  }


  KEY *key_def= &table->key_info[idx];

  KEY_PART_INFO *key_part;


  uint i;

  Uint32 offset= 0;

  NdbQueryParamValue paramValues[ndb_pushed_join::MAX_KEY_PART];

  DBUG_ASSERT(key_def->user_defined_key_parts <= ndb_pushed_join::MAX_KEY_PART);


  uint map[ndb_pushed_join::MAX_KEY_PART];

  ndbcluster_build_key_map(m_table, m_index[idx], &table->key_info[idx], map);


  // Bind key values defining root of pushed join

  for (i = 0, key_part= key_def->key_part; i < key_def->user_defined_key_parts; i++, key_part++)

  {

    bool shrinkVarChar= is_shrinked_varchar(key_part->field);


    if (key_part->null_bit)                         // Column is nullable

    {

      DBUG_ASSERT(idx != table_share->primary_key); // PK can't be nullable

      DBUG_ASSERT(*(key+offset)==0);                // Null values not allowed in key

                                                    // Value is imm. after NULL indicator

      paramValues[map[i]]= NdbQueryParamValue(key+offset+1,shrinkVarChar);

    }

    else                                            // Non-nullable column

    {

      paramValues[map[i]]= NdbQueryParamValue(key+offset,shrinkVarChar);

    }

    offset+= key_part->store_length;

  }


  const int ret= create_pushed_join(paramValues, key_def->user_defined_key_parts);

  DBUG_RETURN(ret);

}


#endif


static uint

count_key_columns(const KEY *key_info, const key_range *key)

{

  KEY_PART_INFO *first_key_part= key_info->key_part;

  KEY_PART_INFO *key_part_end= first_key_part + key_info->user_defined_key_parts;

  KEY_PART_INFO *key_part;

  uint length= 0;

  for(key_part= first_key_part; key_part < key_part_end; key_part++)

  {

    if (length >= key->length)

      break;

    length+= key_part->store_length;

  }

  return key_part - first_key_part;

}


/* Helper method to compute NDB index bounds. Note: does not set range_no. */

/* Stats queries may differ so add "from" 0:normal 1:RIR 2:RPK. */

void

compute_index_bounds(NdbIndexScanOperation::IndexBound & bound,

                     const KEY *key_info,

                     const key_range *start_key, const key_range *end_key,

                     int from)

{

  DBUG_ENTER("ha_ndbcluster::compute_index_bounds");

  DBUG_PRINT("info", ("from: %d", from));


#ifndef DBUG_OFF

  DBUG_PRINT("info", ("key parts: %u length: %u",

                      key_info->user_defined_key_parts, key_info->key_length));

  {

    for (uint j= 0; j <= 1; j++)

    {

      const key_range* kr= (j == 0 ? start_key : end_key);

      if (kr)

      {

        DBUG_PRINT("info", ("key range %u: length: %u map: %lx flag: %d",

                          j, kr->length, kr->keypart_map, kr->flag));

        DBUG_DUMP("key", kr->key, kr->length);

      }

      else

      {

        DBUG_PRINT("info", ("key range %u: none", j));

      }

    }

  }

#endif


  if (start_key)

  {

    bound.low_key= (const char*)start_key->key;

    bound.low_key_count= count_key_columns(key_info, start_key);

    bound.low_inclusive=

      start_key->flag != HA_READ_AFTER_KEY &&

      start_key->flag != HA_READ_BEFORE_KEY;

  }

  else

  {

    bound.low_key= NULL;

    bound.low_key_count= 0;

  }


  /* RIR query for x >= 1 inexplicably passes HA_READ_KEY_EXACT. */

  if (start_key &&

      (start_key->flag == HA_READ_KEY_EXACT ||

       start_key->flag == HA_READ_PREFIX_LAST) &&

      from != 1)

  {

    bound.high_key= bound.low_key;

    bound.high_key_count= bound.low_key_count;

    bound.high_inclusive= TRUE;

  }

  else if (end_key)

  {

    bound.high_key= (const char*)end_key->key;

    bound.high_key_count= count_key_columns(key_info, end_key);

    /*

      For some reason, 'where b >= 1 and b <= 3' uses HA_READ_AFTER_KEY for

      the end_key.

      So HA_READ_AFTER_KEY in end_key sets high_inclusive, even though in

      start_key it does not set low_inclusive.

    */

    bound.high_inclusive= end_key->flag != HA_READ_BEFORE_KEY;

    if (end_key->flag == HA_READ_KEY_EXACT ||

        end_key->flag == HA_READ_PREFIX_LAST)

    {

      bound.low_key= bound.high_key;

      bound.low_key_count= bound.high_key_count;

      bound.low_inclusive= TRUE;

    }

  }

  else

  {

    bound.high_key= NULL;

    bound.high_key_count= 0;

  }

  DBUG_PRINT("info", ("start_flag=%d end_flag=%d"

                      " lo_keys=%d lo_incl=%d hi_keys=%d hi_incl=%d",

                      start_key?start_key->flag:0, end_key?end_key->flag:0,

                      bound.low_key_count,

                      bound.low_key_count?bound.low_inclusive:0,

                      bound.high_key_count,

                      bound.high_key_count?bound.high_inclusive:0));

  DBUG_VOID_RETURN;

}


int ha_ndbcluster::ordered_index_scan(const key_range *start_key,

                                      const key_range *end_key,

                                      bool sorted, bool descending,

                                      uchar* buf, part_id_range *part_spec)

{

  NdbTransaction *trans;

  NdbIndexScanOperation *op;

  int error;


  DBUG_ENTER("ha_ndbcluster::ordered_index_scan");

  DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d read_set=0x%x",

             active_index, sorted, descending, table->read_set->bitmap[0]));

  DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname));


  // Check that sorted seems to be initialised

  DBUG_ASSERT(sorted == 0 || sorted == 1);


  if (unlikely(!(trans= get_transaction(error))))

  {

    DBUG_RETURN(error);

  }


  if ((error= close_scan()))

    DBUG_RETURN(error);


  const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);


  const NdbRecord *key_rec= m_index[active_index].ndb_record_key;

  const NdbRecord *row_rec= m_ndb_record;


  NdbIndexScanOperation::IndexBound bound;

  NdbIndexScanOperation::IndexBound *pbound = NULL;

  if (start_key != NULL || end_key != NULL)

  {

    /*

       Compute bounds info, reversing range boundaries

       if descending

     */

    compute_index_bounds(bound,

                         table->key_info + active_index,

                         (descending?

                          end_key : start_key),

                         (descending?

                          start_key : end_key),

                         0);

    bound.range_no = 0;

    pbound = &bound;

  }


#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

  if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index,

                        sorted))

  {

    const int error= create_pushed_join();

    if (unlikely(error))

      DBUG_RETURN(error);


    NdbQuery* const query= m_active_query;

    if (sorted && query->getQueryOperation((uint)PUSHED_ROOT)

                       ->setOrdering(descending ? NdbQueryOptions::ScanOrdering_descending

                                                : NdbQueryOptions::ScanOrdering_ascending))

    {

      ERR_RETURN(query->getNdbError());

    }


    if (pbound  && query->setBound(key_rec, pbound)!=0)

      ERR_RETURN(query->getNdbError());


    m_thd_ndb->m_scan_count++;


    bool prunable = false;

    if (unlikely(query->isPrunable(prunable) != 0))

      ERR_RETURN(query->getNdbError());

    if (prunable)

      m_thd_ndb->m_pruned_scan_count++;


    DBUG_ASSERT(!uses_blob_value(table->read_set));  // Can't have BLOB in pushed joins (yet)

  }

  else

#endif

  {

    if (m_pushed_join_operation == PUSHED_ROOT)

    {

      m_thd_ndb->m_pushed_queries_dropped++;

    }


    NdbScanOperation::ScanOptions options;

    options.optionsPresent=NdbScanOperation::ScanOptions::SO_SCANFLAGS;

    options.scan_flags=0;


    NdbOperation::GetValueSpec gets[2];

    if (table_share->primary_key == MAX_KEY)

      get_hidden_fields_scan(&options, gets);


    if (lm == NdbOperation::LM_Read)

      options.scan_flags|= NdbScanOperation::SF_KeyInfo;

    if (sorted)

      options.scan_flags|= NdbScanOperation::SF_OrderByFull;

    if (descending)

      options.scan_flags|= NdbScanOperation::SF_Descending;


    /* Partition pruning */

    if (m_use_partition_pruning &&

        m_user_defined_partitioning && part_spec != NULL &&

        part_spec->start_part == part_spec->end_part)

    {

      /* Explicitly set partition id when pruning User-defined partitioned scan */

      options.partitionId = part_spec->start_part;

      options.optionsPresent |= NdbScanOperation::ScanOptions::SO_PARTITION_ID;

    }


    NdbInterpretedCode code(m_table);

    if (m_cond && m_cond->generate_scan_filter(&code, &options))

      ERR_RETURN(code.getNdbError());


    if (!(op= trans->scanIndex(key_rec, row_rec, lm,

                               (uchar *)(table->read_set->bitmap),

                               pbound,

                               &options,

                               sizeof(NdbScanOperation::ScanOptions))))

      ERR_RETURN(trans->getNdbError());


    DBUG_PRINT("info", ("Is scan pruned to 1 partition? : %u", op->getPruned()));

    m_thd_ndb->m_scan_count++;

    m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);


    if (uses_blob_value(table->read_set) &&

        get_blob_values(op, NULL, table->read_set) != 0)

      ERR_RETURN(op->getNdbError());


    m_active_cursor= op;

  }


  if (sorted)

  {

    m_thd_ndb->m_sorted_scan_count++;

  }


  if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)

    DBUG_RETURN(ndb_err(trans));


  DBUG_RETURN(next_result(buf));

}


static

int

guess_scan_flags(NdbOperation::LockMode lm,

                 const NDBTAB* tab, const MY_BITMAP* readset)

{

  int flags= 0;

  flags|= (lm == NdbOperation::LM_Read) ? NdbScanOperation::SF_KeyInfo : 0;

  if (tab->checkColumns(0, 0) & 2)

  {

    int ret = tab->checkColumns(readset->bitmap, no_bytes_in_map(readset));


    if (ret & 2)

    { // If disk columns...use disk scan

      flags |= NdbScanOperation::SF_DiskScan;

    }

    else if ((ret & 4) == 0 && (lm == NdbOperation::LM_Exclusive))

    {

      // If no mem column is set and exclusive...guess disk scan

      flags |= NdbScanOperation::SF_DiskScan;

    }

  }

  return flags;

}


/*

  Start full table scan in NDB or unique index scan

 */


int ha_ndbcluster::full_table_scan(const KEY* key_info,

                                   const key_range *start_key,

                                   const key_range *end_key,

                                   uchar *buf)

{

  int error;

  NdbTransaction *trans= m_thd_ndb->trans;

  part_id_range part_spec;

  bool use_set_part_id= FALSE;

  NdbOperation::GetValueSpec gets[2];


  DBUG_ENTER("full_table_scan");

  DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));


  if (m_use_partition_pruning && m_user_defined_partitioning)

  {

    DBUG_ASSERT(m_pushed_join_operation != PUSHED_ROOT);

    part_spec.start_part= 0;

    part_spec.end_part= m_part_info->get_tot_partitions() - 1;

    prune_partition_set(table, &part_spec);

    DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",

                        part_spec.start_part, part_spec.end_part));

    /*

      If partition pruning has found no partition in set

      we can return HA_ERR_END_OF_FILE

    */

    if (part_spec.start_part > part_spec.end_part)

    {

      DBUG_RETURN(HA_ERR_END_OF_FILE);

    }


    if (part_spec.start_part == part_spec.end_part)

    {

      /*

       * Only one partition is required to scan, if sorted is required

       * don't need it anymore since output from one ordered partitioned

       * index is always sorted.

       *

       * Note : This table scan pruning currently only occurs for

       * UserDefined partitioned tables.

       * It could be extended to occur for natively partitioned tables if

       * the Partitioning layer can make a key (e.g. start or end key)

       * available so that we can determine the correct pruning in the

       * NDBAPI layer.

       */

      use_set_part_id= TRUE;

      if (!trans)

        if (unlikely(!(trans= get_transaction_part_id(part_spec.start_part,

                                                      error))))

          DBUG_RETURN(error);

    }

  }

  if (!trans)

    if (unlikely(!(trans= start_transaction(error))))

      DBUG_RETURN(error);


  const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);

  NdbScanOperation::ScanOptions options;

  options.optionsPresent = (NdbScanOperation::ScanOptions::SO_SCANFLAGS |

                            NdbScanOperation::ScanOptions::SO_PARALLEL);

  options.scan_flags = guess_scan_flags(lm, m_table, table->read_set);

  options.parallel= DEFAULT_PARALLELISM;


  if (use_set_part_id) {

    assert(m_user_defined_partitioning);

    options.optionsPresent|= NdbScanOperation::ScanOptions::SO_PARTITION_ID;

    options.partitionId = part_spec.start_part;

  };


  if (table_share->primary_key == MAX_KEY)

    get_hidden_fields_scan(&options, gets);


#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

  if (check_if_pushable(NdbQueryOperationDef::TableScan))

  {

    const int error= create_pushed_join();

    if (unlikely(error))

      DBUG_RETURN(error);


    m_thd_ndb->m_scan_count++;

    DBUG_ASSERT(!uses_blob_value(table->read_set));  // Can't have BLOB in pushed joins (yet)

  }

  else

#endif

  {

    if (m_pushed_join_operation == PUSHED_ROOT)

    {

      m_thd_ndb->m_pushed_queries_dropped++;

    }


    NdbScanOperation *op;

    NdbInterpretedCode code(m_table);


    if (!key_info)

    {

      if (m_cond && m_cond->generate_scan_filter(&code, &options))

        ERR_RETURN(code.getNdbError());

    }

    else

    {

      /* Unique index scan in NDB (full table scan with scan filter) */

      DBUG_PRINT("info", ("Starting unique index scan"));

      if (!m_cond)

        m_cond= new ha_ndbcluster_cond;


      if (!m_cond)

      {

        my_errno= HA_ERR_OUT_OF_MEM;

        DBUG_RETURN(my_errno);

      }

      if (m_cond->generate_scan_filter_from_key(&code, &options, key_info, start_key, end_key, buf))

        ERR_RETURN(code.getNdbError());

    }


    if (!(op= trans->scanTable(m_ndb_record, lm,

                               (uchar *)(table->read_set->bitmap),

                               &options, sizeof(NdbScanOperation::ScanOptions))))

      ERR_RETURN(trans->getNdbError());


    m_thd_ndb->m_scan_count++;

    m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);


    DBUG_ASSERT(m_active_cursor==NULL);

    m_active_cursor= op;


    if (uses_blob_value(table->read_set) &&

        get_blob_values(op, NULL, table->read_set) != 0)

      ERR_RETURN(op->getNdbError());

  } // if (check_if_pushable(NdbQueryOperationDef::TableScan))


  if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)

    DBUG_RETURN(ndb_err(trans));

  DBUG_PRINT("exit", ("Scan started successfully"));

  DBUG_RETURN(next_result(buf));

} // ha_ndbcluster::full_table_scan()


int

ha_ndbcluster::set_auto_inc(THD *thd, Field *field)

{

  DBUG_ENTER("ha_ndbcluster::set_auto_inc");

  bool read_bit= bitmap_is_set(table->read_set, field->field_index);

  bitmap_set_bit(table->read_set, field->field_index);

  Uint64 next_val= (Uint64) field->val_int() + 1;

  if (!read_bit)

    bitmap_clear_bit(table->read_set, field->field_index);

  DBUG_RETURN(set_auto_inc_val(thd, next_val));

}


inline

int

ha_ndbcluster::set_auto_inc_val(THD *thd, Uint64 value)

{

  Ndb *ndb= get_ndb(thd);

  DBUG_ENTER("ha_ndbcluster::set_auto_inc_val");

#ifndef DBUG_OFF

  char buff[22];

  DBUG_PRINT("info",

             ("Trying to set next auto increment value to %s",

              llstr(value, buff)));

#endif

  if (ndb->checkUpdateAutoIncrementValue(m_share->tuple_id_range, value))

  {

    Ndb_tuple_id_range_guard g(m_share);

    if (ndb->setAutoIncrementValue(m_table, g.range, value, TRUE)

        == -1)

      ERR_RETURN(ndb->getNdbError());

  }

  DBUG_RETURN(0);

}


Uint32

ha_ndbcluster::setup_get_hidden_fields(NdbOperation::GetValueSpec gets[2])

{

  Uint32 num_gets= 0;

  /*

    We need to read the hidden primary key, and possibly the FRAGMENT

    pseudo-column.

  */

  gets[num_gets].column= get_hidden_key_column();

  gets[num_gets].appStorage= &m_ref;

  num_gets++;

  if (m_user_defined_partitioning)

  {

    /* Need to read partition id to support ORDER BY columns. */

    gets[num_gets].column= NdbDictionary::Column::FRAGMENT;

    gets[num_gets].appStorage= &m_part_id;

    num_gets++;

  }

  return num_gets;

}


void

ha_ndbcluster::get_hidden_fields_keyop(NdbOperation::OperationOptions *options,

                                       NdbOperation::GetValueSpec gets[2])

{

  Uint32 num_gets= setup_get_hidden_fields(gets);

  options->optionsPresent|= NdbOperation::OperationOptions::OO_GETVALUE;

  options->extraGetValues= gets;

  options->numExtraGetValues= num_gets;

}


void

ha_ndbcluster::get_hidden_fields_scan(NdbScanOperation::ScanOptions *options,

                                      NdbOperation::GetValueSpec gets[2])

{

  Uint32 num_gets= setup_get_hidden_fields(gets);

  options->optionsPresent|= NdbScanOperation::ScanOptions::SO_GETVALUE;

  options->extraGetValues= gets;

  options->numExtraGetValues= num_gets;

}


inline void

ha_ndbcluster::eventSetAnyValue(THD *thd,

                                NdbOperation::OperationOptions *options) const

{

  options->anyValue= 0;

  if (unlikely(m_slow_path))

  {

    /*

      Ignore TNTO_NO_LOGGING for slave thd.  It is used to indicate

      log-slave-updates option.  This is instead handled in the

      injector thread, by looking explicitly at the

      opt_log_slave_updates flag.

    */

    Thd_ndb *thd_ndb= get_thd_ndb(thd);

    if (thd->slave_thread)

    {

      /*

        Slave-thread, we are applying a replicated event.

        We set the server_id to the value received from the log which

        may be a composite of server_id and other data according

        to the server_id_bits option.

        In future it may be useful to support *not* mapping composite

        AnyValues to/from Binlogged server-ids

      */

      options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;

      options->anyValue = thd_unmasked_server_id(thd);

    }

    else if (thd_ndb->trans_options & TNTO_NO_LOGGING)

    {

      options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;

      ndbcluster_anyvalue_set_nologging(options->anyValue);

    }

  }

#ifndef DBUG_OFF

  /*

    MySQLD will set the user-portion of AnyValue (if any) to all 1s

    This tests code filtering ServerIds on the value of server-id-bits.

  */

  const char* p = getenv("NDB_TEST_ANYVALUE_USERDATA");

  if (p != 0  && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')

  {

    options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;

    dbug_ndbcluster_anyvalue_set_userbits(options->anyValue);

  }

#endif

}


#ifdef HAVE_NDB_BINLOG


int

ha_ndbcluster::prepare_conflict_detection(enum_conflicting_op_type op_type,

                                          const NdbRecord* key_rec,

                                          const uchar* old_data,

                                          const uchar* new_data,

                                          NdbInterpretedCode* code,

                                          NdbOperation::OperationOptions* options)

{

  DBUG_ENTER("prepare_conflict_detection");


  int res = 0;

  const st_conflict_fn_def* conflict_fn = m_share->m_cfn_share->m_conflict_fn;

  assert( conflict_fn != NULL );


  /*

     Prepare interpreted code for operation (update + delete only) according

     to algorithm used

  */

  if (op_type != WRITE_ROW)

  {

    res = conflict_fn->prep_func(m_share->m_cfn_share,

                                 op_type,

                                 old_data,

                                 new_data,

                                 table->write_set,

                                 code);


    if (!res)

    {

      /* Attach conflict detecting filter program to operation */

      options->optionsPresent|=NdbOperation::OperationOptions::OO_INTERPRETED;

      options->interpretedCode= code;

    }

  } // if (op_type != WRITE_ROW)


  g_ndb_slave_state.current_conflict_defined_op_count++;


  /* Now save data for potential insert to exceptions table... */

  const uchar* row_to_save = (op_type == DELETE_ROW)? old_data : new_data;

  Ndb_exceptions_data ex_data;

  ex_data.share= m_share;

  ex_data.key_rec= key_rec;

  ex_data.op_type= op_type;

  /*

    We need to save the row data for possible conflict resolution after

    execute().

  */

  ex_data.row= copy_row_to_buffer(m_thd_ndb, row_to_save);

  uchar* ex_data_buffer= get_buffer(m_thd_ndb, sizeof(ex_data));

  if (ex_data.row == NULL || ex_data_buffer == NULL)

  {

    DBUG_RETURN(HA_ERR_OUT_OF_MEM);

  }

  memcpy(ex_data_buffer, &ex_data, sizeof(ex_data));


  /* Store ptr to exceptions data in operation 'customdata' ptr */

  options->optionsPresent|= NdbOperation::OperationOptions::OO_CUSTOMDATA;

  options->customData= (void*)ex_data_buffer;


  DBUG_RETURN(0);

}


int

handle_conflict_op_error(Thd_ndb* thd_ndb,

                         NdbTransaction* trans,

                         const NdbError& err,

                         const NdbOperation* op)

{

  DBUG_ENTER("handle_conflict_op_error");

  DBUG_PRINT("info", ("ndb error: %d", err.code));


  if ((err.code == (int) error_conflict_fn_violation) ||

      (err.code == (int) error_op_after_refresh_op) ||

      (err.classification == NdbError::ConstraintViolation) ||

      (err.classification == NdbError::NoDataFound))

  {

    DBUG_PRINT("info",

               ("err.code %s (int) error_conflict_fn_violation, "

                "err.classification %s",

                err.code == (int) error_conflict_fn_violation ? "==" : "!=",

                err.classification

                == NdbError::ConstraintViolation

                ? "== NdbError::ConstraintViolation"

                : (err.classification == NdbError::NoDataFound

                   ? "== NdbError::NoDataFound" : "!=")));


    enum_conflict_cause conflict_cause;


    if ((err.code == (int) error_conflict_fn_violation) ||

        (err.code == (int) error_op_after_refresh_op))

    {

      conflict_cause= ROW_IN_CONFLICT;

    }

    else if (err.classification == NdbError::ConstraintViolation)

    {

      conflict_cause= ROW_ALREADY_EXISTS;

    }

    else

    {

      assert(err.classification == NdbError::NoDataFound);

      conflict_cause= ROW_DOES_NOT_EXIST;

    }


    const void* buffer=op->getCustomData();

    assert(buffer);

    Ndb_exceptions_data ex_data;

    memcpy(&ex_data, buffer, sizeof(ex_data));

    NDB_SHARE *share= ex_data.share;

    const NdbRecord* key_rec= ex_data.key_rec;

    const uchar* row= ex_data.row;

    enum_conflicting_op_type op_type = ex_data.op_type;

    DBUG_ASSERT(share != NULL && row != NULL);


    NDB_CONFLICT_FN_SHARE* cfn_share= share->m_cfn_share;

    if (cfn_share)

    {

      enum_conflict_fn_type cft = cfn_share->m_conflict_fn->type;

      bool haveExTable = cfn_share->m_ex_tab != NULL;


      g_ndb_slave_state.current_violation_count[cft]++;


      {

        NdbError handle_error;

        if (handle_row_conflict(cfn_share,

                                share->table_name,

                                key_rec,

                                row,

                                op_type,

                                conflict_cause,

                                err,

                                trans,

                                handle_error))

        {

          /* Error with handling of row conflict */

          char msg[FN_REFLEN];

          my_snprintf(msg, sizeof(msg), "Row conflict handling "

                      "on table %s hit Ndb error %d '%s'",

                      share->table_name,

                      handle_error.code,

                      handle_error.message);


          if (handle_error.status == NdbError::TemporaryError)

          {

            /* Slave will roll back and retry entire transaction. */

            ERR_RETURN(handle_error);

          }

          else

          {

            push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                                ER_EXCEPTIONS_WRITE_ERROR,

                                ER(ER_EXCEPTIONS_WRITE_ERROR), msg);

            /* Slave will stop replication. */

            DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);

          }

        }

      }


      if (haveExTable)

      {

        NdbError ex_err;

        if (write_conflict_row(share, trans, row, ex_err))

        {

          char msg[FN_REFLEN];

          my_snprintf(msg, sizeof(msg), "table %s NDB error %d '%s'",

                      cfn_share->m_ex_tab->getName(),

                      ex_err.code, ex_err.message);


          NdbDictionary::Dictionary* dict= thd_ndb->ndb->getDictionary();


          if (ex_err.classification == NdbError::SchemaError)

          {

            dict->removeTableGlobal(*(cfn_share->m_ex_tab), false);

            cfn_share->m_ex_tab= NULL;

          }

          else if (ex_err.status == NdbError::TemporaryError)

          {

            /* Slave will roll back and retry entire transaction. */

            ERR_RETURN(ex_err);

          }

          else

          {

            push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                                ER_EXCEPTIONS_WRITE_ERROR,

                                ER(ER_EXCEPTIONS_WRITE_ERROR), msg);

            /* Slave will stop replication. */

            DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);

          }

        }

      } // if (haveExTable)


      DBUG_RETURN(0);

    }

    else

    {

      DBUG_PRINT("info", ("missing cfn_share"));

      DBUG_RETURN(0); // TODO : Correct?

    }

  }

  else

  {

    /* Non conflict related error */

    DBUG_PRINT("info", ("err.code == %u", err.code));

    DBUG_RETURN(err.code);

  }


  DBUG_RETURN(0); // Reachable?

}

#endif /* HAVE_NDB_BINLOG */


#ifdef HAVE_NDB_BINLOG

/*

  is_serverid_local

*/

static bool is_serverid_local(Uint32 serverid)

{

  /*

     If it's not our serverid, check the

     IGNORE_SERVER_IDS setting to check if

     it's local.

  */

  return ((serverid == ::server_id) ||

          ndb_mi_get_ignore_server_id(serverid));

}

#endif


int ha_ndbcluster::write_row(uchar *record)

{

  DBUG_ENTER("ha_ndbcluster::write_row");

#ifdef HAVE_NDB_BINLOG

  if (m_share == ndb_apply_status_share && table->in_use->slave_thread)

  {

    uint32 row_server_id, master_server_id= ndb_mi_get_master_server_id();

    uint64 row_epoch;

    memcpy(&row_server_id, table->field[0]->ptr + (record - table->record[0]),

           sizeof(row_server_id));

    memcpy(&row_epoch, table->field[1]->ptr + (record - table->record[0]),

           sizeof(row_epoch));

    g_ndb_slave_state.atApplyStatusWrite(master_server_id,

                                         row_server_id,

                                         row_epoch,

                                         is_serverid_local(row_server_id));

  }

#endif /* HAVE_NDB_BINLOG */

  DBUG_RETURN(ndb_write_row(record, FALSE, FALSE));

}


int ha_ndbcluster::ndb_write_row(uchar *record,

                                 bool primary_key_update,

                                 bool batched_update)

{

  bool has_auto_increment;

  const NdbOperation *op;

  THD *thd= table->in_use;

  Thd_ndb *thd_ndb= m_thd_ndb;

  NdbTransaction *trans;

  uint32 part_id;

  int error;

  NdbOperation::SetValueSpec sets[3];

  Uint32 num_sets= 0;

  DBUG_ENTER("ha_ndbcluster::ndb_write_row");


  error = check_slave_state(thd);

  if (unlikely(error))

    DBUG_RETURN(error);


  has_auto_increment= (table->next_number_field && record == table->record[0]);


  if (has_auto_increment && table_share->primary_key != MAX_KEY)

  {

    /*

     * Increase any auto_incremented primary key

     */

    m_skip_auto_increment= FALSE;

    if ((error= update_auto_increment()))

      DBUG_RETURN(error);

    m_skip_auto_increment= (insert_id_for_cur_row == 0);

  }


  /*

   * If IGNORE the ignore constraint violations on primary and unique keys

   */

  if (!m_use_write && m_ignore_dup_key)

  {

    /*

      compare if expression with that in start_bulk_insert()

      start_bulk_insert will set parameters to ensure that each

      write_row is committed individually

    */

    int peek_res= peek_indexed_rows(record, NDB_INSERT);


    if (!peek_res)

    {

      DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);

    }

    if (peek_res != HA_ERR_KEY_NOT_FOUND)

      DBUG_RETURN(peek_res);

  }


  bool uses_blobs= uses_blob_value(table->write_set);


  Uint64 auto_value;

  const NdbRecord *key_rec;

  const uchar *key_row;

  if (table_share->primary_key == MAX_KEY)

  {

    /* Table has hidden primary key. */

    Ndb *ndb= get_ndb(thd);

    uint retries= NDB_AUTO_INCREMENT_RETRIES;

    int retry_sleep= 30; /* 30 milliseconds, transaction */

    for (;;)

    {

      Ndb_tuple_id_range_guard g(m_share);

      if (ndb->getAutoIncrementValue(m_table, g.range, auto_value, 1000) == -1)

      {

        if (--retries && !thd->killed &&

            ndb->getNdbError().status == NdbError::TemporaryError)

        {

          do_retry_sleep(retry_sleep);

          continue;

        }

        ERR_RETURN(ndb->getNdbError());

      }

      break;

    }

    sets[num_sets].column= get_hidden_key_column();

    sets[num_sets].value= &auto_value;

    num_sets++;

    key_rec= m_ndb_hidden_key_record;

    key_row= (const uchar *)&auto_value;

  }

  else

  {

    key_rec= m_index[table_share->primary_key].ndb_unique_record_row;

    key_row= record;

  }


  trans= thd_ndb->trans;

  if (m_user_defined_partitioning)

  {

    DBUG_ASSERT(m_use_partition_pruning);

    longlong func_value= 0;

    my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);

    error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);

    dbug_tmp_restore_column_map(table->read_set, old_map);

    if (unlikely(error))

    {

      m_part_info->err_value= func_value;

      DBUG_RETURN(error);

    }

    {

      /*

        We need to set the value of the partition function value in

        NDB since the NDB kernel doesn't have easy access to the function

        to calculate the value.

      */

      if (func_value >= INT_MAX32)

        func_value= INT_MAX32;

      sets[num_sets].column= get_partition_id_column();

      sets[num_sets].value= &func_value;

      num_sets++;

    }

    if (!trans)

      if (unlikely(!(trans= start_transaction_part_id(part_id, error))))

        DBUG_RETURN(error);

  }

  else if (!trans)

  {

    if (unlikely(!(trans= start_transaction_row(key_rec, key_row, error))))

      DBUG_RETURN(error);

  }

  DBUG_ASSERT(trans);


  ha_statistic_increment(&SSV::ha_write_count);


  /*

     Setup OperationOptions

   */

  NdbOperation::OperationOptions options;

  NdbOperation::OperationOptions *poptions = NULL;

  options.optionsPresent=0;


  eventSetAnyValue(thd, &options);

  bool need_flush= add_row_check_if_batch_full(thd_ndb);


  const Uint32 authorValue = 1;

  if ((thd->slave_thread) &&

      (m_table->getExtraRowAuthorBits()))

  {

    /* Set author to indicate slave updated last */

    sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;

    sets[num_sets].value= &authorValue;

    num_sets++;

  }


  if (m_user_defined_partitioning)

  {

    options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;

    options.partitionId= part_id;

  }

  if (num_sets)

  {

    options.optionsPresent |= NdbOperation::OperationOptions::OO_SETVALUE;

    options.extraSetValues= sets;

    options.numExtraSetValues= num_sets;

  }

  if (thd->slave_thread || THDVAR(thd, deferred_constraints))

  {

    options.optionsPresent |=

      NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;

  }


  if (options.optionsPresent != 0)

    poptions=&options;


  const Uint32 bitmapSz= (NDB_MAX_ATTRIBUTES_IN_TABLE + 31)/32;

  uint32 tmpBitmapSpace[bitmapSz];

  MY_BITMAP tmpBitmap;

  MY_BITMAP *user_cols_written_bitmap;

#ifdef HAVE_NDB_BINLOG

  bool haveConflictFunction =

    (thd->slave_thread &&

     m_share->m_cfn_share &&

     m_share->m_cfn_share->m_conflict_fn);

#endif


  if (m_use_write

#ifdef HAVE_NDB_BINLOG

      /* Conflict detection must use normal Insert */

      && !haveConflictFunction

#endif

      )

  {

    /* Should we use the supplied table writeset or not?

     * For a REPLACE command, we should ignore it, and write

     * all columns to get correct REPLACE behaviour.

     * For applying Binlog events, we need to use the writeset

     * to avoid trampling unchanged columns when an update is

     * logged as a WRITE

     */

    bool useWriteSet= isManualBinlogExec(thd);


#ifdef HAVE_NDB_BINLOG

    /* Slave always uses writeset

     * TODO : What about SBR replicating a

     * REPLACE command?

     */

    useWriteSet |= thd->slave_thread;

#endif

    uchar* mask;


    if (useWriteSet)

    {

      user_cols_written_bitmap= table->write_set;

      mask= (uchar *)(user_cols_written_bitmap->bitmap);

    }

    else

    {

      user_cols_written_bitmap= NULL;

      mask= NULL;

    }

    /* TODO : Add conflict detection etc when interpreted write supported */

    op= trans->writeTuple(key_rec, (const char *)key_row, m_ndb_record,

                          (char *)record, mask,

                          poptions, sizeof(NdbOperation::OperationOptions));

  }

  else

  {

#ifdef HAVE_NDB_BINLOG

    if (haveConflictFunction)

    {

      /* Conflict detection in slave thread */

      if (unlikely((error = prepare_conflict_detection(WRITE_ROW,

                                                       key_rec,

                                                       NULL,    /* old_data */

                                                       record,  /* new_data */

                                                       NULL,    /* code */

                                                       &options))))

        DBUG_RETURN(error);

    }

#endif

    uchar *mask;


    /* Check whether Ndb table definition includes any default values. */

    if (m_table->hasDefaultValues())

    {

      DBUG_PRINT("info", ("Not sending values for native defaulted columns"));


      /*

        If Ndb is unaware of the table's defaults, we must provide all column values to the insert.

        This is done using a NULL column mask.

        If Ndb is aware of the table's defaults, we only need to provide

        the columns explicitly mentioned in the write set,

        plus any extra columns required due to bug#41616.

        plus the primary key columns required due to bug#42238.

      */

      /*

        The following code for setting user_cols_written_bitmap

        should be removed after BUG#41616 and Bug#42238 are fixed

      */

      /* Copy table write set so that we can add to it */

      user_cols_written_bitmap= &tmpBitmap;

      bitmap_init(user_cols_written_bitmap, tmpBitmapSpace,

                  table->write_set->n_bits, false);

      bitmap_copy(user_cols_written_bitmap, table->write_set);


      for (uint i= 0; i < table->s->fields; i++)

      {

        Field *field= table->field[i];

        DBUG_PRINT("info", ("Field#%u, (%u), Type : %u "

                            "NO_DEFAULT_VALUE_FLAG : %u PRI_KEY_FLAG : %u",

                            i,

                            field->field_index,

                            field->real_type(),

                            field->flags & NO_DEFAULT_VALUE_FLAG,

                            field->flags & PRI_KEY_FLAG));

        if ((field->flags & (NO_DEFAULT_VALUE_FLAG | // bug 41616

                             PRI_KEY_FLAG)) ||       // bug 42238

            ! type_supports_default_value(field->real_type()))

        {

          bitmap_set_bit(user_cols_written_bitmap, field->field_index);

        }

      }


      mask= (uchar *)(user_cols_written_bitmap->bitmap);

    }

    else

    {

      /* No defaults in kernel, provide all columns ourselves */

      DBUG_PRINT("info", ("No native defaults, sending all values"));

      user_cols_written_bitmap= NULL;

      mask = NULL;

    }


    /* Using insert, we write all non default columns */

    op= trans->insertTuple(key_rec, (const char *)key_row, m_ndb_record,

                           (char *)record, mask, // Default value should be masked

                           poptions, sizeof(NdbOperation::OperationOptions));

  }

  if (!(op))

    ERR_RETURN(trans->getNdbError());


  bool do_batch= !need_flush &&

    (batched_update || thd_allow_batch(thd));

  uint blob_count= 0;

  if (table_share->blob_fields > 0)

  {

    my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);

    /* Set Blob values for all columns updated by the operation */

    int res= set_blob_values(op, record - table->record[0],

                             user_cols_written_bitmap, &blob_count, do_batch);

    dbug_tmp_restore_column_map(table->read_set, old_map);

    if (res != 0)

      DBUG_RETURN(res);

  }


  m_rows_changed++;


  /*

    Execute write operation

    NOTE When doing inserts with many values in

    each INSERT statement it should not be necessary

    to NoCommit the transaction between each row.

    Find out how this is detected!

  */

  m_rows_inserted++;

  no_uncommitted_rows_update(1);

  if (( (m_rows_to_insert == 1 || uses_blobs) && !do_batch ) ||

      primary_key_update ||

      need_flush)

  {

    int res= flush_bulk_insert();

    if (res != 0)

    {

      m_skip_auto_increment= TRUE;

      DBUG_RETURN(res);

    }

  }

  if ((has_auto_increment) && (m_skip_auto_increment))

  {

    int ret_val;

    if ((ret_val= set_auto_inc(thd, table->next_number_field)))

    {

      DBUG_RETURN(ret_val);

    }

  }

  m_skip_auto_increment= TRUE;


  DBUG_PRINT("exit",("ok"));

  DBUG_RETURN(0);

}


/* Compare if an update changes the primary key in a row. */

int ha_ndbcluster::primary_key_cmp(const uchar * old_row, const uchar * new_row)

{

  uint keynr= table_share->primary_key;

  KEY_PART_INFO *key_part=table->key_info[keynr].key_part;

  KEY_PART_INFO *end=key_part+table->key_info[keynr].user_defined_key_parts;


  for (; key_part != end ; key_part++)

  {

    if (!bitmap_is_set(table->write_set, key_part->fieldnr - 1))

      continue;


    /* The primary key does not allow NULLs. */

    DBUG_ASSERT(!key_part->null_bit);


    if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))

    {


      if (key_part->field->cmp_binary((old_row + key_part->offset),

                                      (new_row + key_part->offset),

                                      (ulong) key_part->length))

        return 1;

    }

    else

    {

      if (memcmp(old_row+key_part->offset, new_row+key_part->offset,

                 key_part->length))

        return 1;

    }

  }

  return 0;

}


#ifdef HAVE_NDB_BINLOG

int

handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,

                    const char* table_name,

                    const NdbRecord* key_rec,

                    const uchar* pk_row,

                    enum_conflicting_op_type op_type,

                    enum_conflict_cause conflict_cause,

                    const NdbError& conflict_error,

                    NdbTransaction* conflict_trans,

                    NdbError& err)

{

  DBUG_ENTER("handle_row_conflict");


  if (cfn_share->m_flags & CFF_REFRESH_ROWS)

  {

    /* A conflict has been detected between an applied replicated operation

     * and the data in the DB.

     * The attempt to change the local DB will have been rejected.

     * We now take steps to generate a refresh Binlog event so that

     * other clusters will be re-aligned.

     */

    DBUG_PRINT("info", ("Conflict on table %s.  Operation type : %s, "

                        "conflict cause :%s, conflict error : %u : %s",

                        table_name,

                        ((op_type == WRITE_ROW)? "WRITE_ROW":

                         (op_type == UPDATE_ROW)? "UPDATE_ROW":

                         "DELETE_ROW"),

                        ((conflict_cause == ROW_ALREADY_EXISTS)?"ROW_ALREADY_EXISTS":

                         (conflict_cause == ROW_DOES_NOT_EXIST)?"ROW_DOES_NOT_EXIST":

                         "ROW_IN_CONFLICT"),

                        conflict_error.code,

                        conflict_error.message));


    assert(key_rec != NULL);

    assert(pk_row != NULL);


    /* When the slave splits an epoch into batches, a conflict row detected

     * and refreshed in an early batch can be written to by operations in

     * a later batch.  As the operations will not have applied, and the

     * row has already been refreshed, we need not attempt to refresh

     * it again

     */

    if ((conflict_cause == ROW_IN_CONFLICT) &&

        (conflict_error.code == (int) error_op_after_refresh_op))

    {

      /* Attempt to apply an operation after the row was refreshed

       * Ignore the error

       */

      DBUG_PRINT("info", ("Operation after refresh error - ignoring"));

      DBUG_RETURN(0);

    }


    /* When a delete operation finds that the row does not exist, it indicates

     * a DELETE vs DELETE conflict.  If we refresh the row then we can get

     * non deterministic behaviour depending on slave batching as follows :

     *   Row is deleted

     *

     *     Case 1

     *       Slave applied DELETE, INSERT in 1 batch

     *

     *         After first batch, the row is present (due to INSERT), it is

     *         refreshed.

     *

     *     Case 2

     *       Slave applied DELETE in 1 batch, INSERT in 2nd batch

     *

     *         After first batch, the row is not present, it is refreshed

     *         INSERT is then rejected.

     *

     * The problem of not being able to 'record' a DELETE vs DELETE conflict

     * is known.  We attempt at least to give consistent behaviour for

     * DELETE vs DELETE conflicts by :

     *   NOT refreshing a row when a DELETE vs DELETE conflict is detected

     * This should map all batching scenarios onto Case1.

     */

    if ((op_type == DELETE_ROW) &&

        (conflict_cause == ROW_DOES_NOT_EXIST))

    {

      DBUG_PRINT("info", ("Delete vs Delete detected, NOT refreshing"));

      DBUG_RETURN(0);

    }


    /* Create a refresh to operation to realign other clusters */

    // TODO AnyValue

    // TODO Do we ever get non-PK key?

    //      Keyless table?

    //      Unique index

    const NdbOperation* refresh_op= conflict_trans->refreshTuple(key_rec,

                                                                 (const char*) pk_row);


    if (!refresh_op)

    {

      err= conflict_trans->getNdbError();

      DBUG_RETURN(1);

    }

  } /* if (cfn_share->m_flags & CFF_REFRESH_ROWS) */


  DBUG_RETURN(0);

};

#endif /* HAVE_NDB_BINLOG */


bool ha_ndbcluster::start_bulk_update()

{

  DBUG_ENTER("ha_ndbcluster::start_bulk_update");

  if (!m_use_write && m_ignore_dup_key)

  {

    DBUG_PRINT("info", ("Batching turned off as duplicate key is "

                        "ignored by using peek_row"));

    DBUG_RETURN(TRUE);

  }

  DBUG_RETURN(FALSE);

}


int ha_ndbcluster::bulk_update_row(const uchar *old_data, uchar *new_data,

                                   uint *dup_key_found)

{

  DBUG_ENTER("ha_ndbcluster::bulk_update_row");

  *dup_key_found= 0;

  DBUG_RETURN(ndb_update_row(old_data, new_data, 1));

}


int ha_ndbcluster::exec_bulk_update(uint *dup_key_found)

{

  NdbTransaction* trans= m_thd_ndb->trans;

  DBUG_ENTER("ha_ndbcluster::exec_bulk_update");

  *dup_key_found= 0;


  // m_handler must be NULL or point to _this_ handler instance

  assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);


  if (m_thd_ndb->m_handler &&

      m_read_before_write_removal_possible)

  {

    /*

      This is an autocommit involving only one table and rbwr is on


      Commit the autocommit transaction early(before the usual place

      in ndbcluster_commit) in order to:

      1) save one round trip, "no-commit+commit" converted to "commit"

      2) return the correct number of updated and affected rows

         to the update loop(which will ask handler in rbwr mode)

    */

    DBUG_PRINT("info", ("committing auto-commit+rbwr early"));

    uint ignore_count= 0;

    const int ignore_error= 1;

    if (execute_commit(table->in_use, m_thd_ndb, trans,

                       m_thd_ndb->m_force_send, ignore_error,

                       &ignore_count) != 0)

    {

      no_uncommitted_rows_execute_failure();

      DBUG_RETURN(ndb_err(trans));

    }

    DBUG_PRINT("info", ("ignore_count: %u", ignore_count));

    assert(m_rows_changed >= ignore_count);

    assert(m_rows_updated >= ignore_count);

    m_rows_changed-= ignore_count;

    m_rows_updated-= ignore_count;

    DBUG_RETURN(0);

  }


  if (m_thd_ndb->m_unsent_bytes == 0)

  {

    DBUG_PRINT("exit", ("skip execute - no unsent bytes"));

    DBUG_RETURN(0);

  }


  if (thd_allow_batch(table->in_use))

  {

    /*

      Turned on by @@transaction_allow_batching=ON

      or implicitly by slave exec thread

    */

    DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));

    DBUG_RETURN(0);

  }


  if (m_thd_ndb->m_handler &&

      !m_blobs_pending)

  {

    // Execute at commit time(in 'ndbcluster_commit') to save a round trip

    DBUG_PRINT("exit", ("skip execute - simple autocommit"));

    DBUG_RETURN(0);

  }


  uint ignore_count= 0;

  if (execute_no_commit(m_thd_ndb, trans,

                        m_ignore_no_key || m_read_before_write_removal_used,

                        &ignore_count) != 0)

  {

    no_uncommitted_rows_execute_failure();

    DBUG_RETURN(ndb_err(trans));

  }

  assert(m_rows_changed >= ignore_count);

  assert(m_rows_updated >= ignore_count);

  m_rows_changed-= ignore_count;

  m_rows_updated-= ignore_count;

  DBUG_RETURN(0);

}


void ha_ndbcluster::end_bulk_update()

{

  DBUG_ENTER("ha_ndbcluster::end_bulk_update");

  DBUG_VOID_RETURN;

}


int ha_ndbcluster::update_row(const uchar *old_data, uchar *new_data)

{

  return ndb_update_row(old_data, new_data, 0);

}


void

ha_ndbcluster::setup_key_ref_for_ndb_record(const NdbRecord **key_rec,

                                            const uchar **key_row,

                                            const uchar *record,

                                            bool use_active_index)

{

  DBUG_ENTER("setup_key_ref_for_ndb_record");

  if (use_active_index)

  {

    /* Use unique key to access table */

    DBUG_PRINT("info", ("Using unique index (%u)", active_index));

    *key_rec= m_index[active_index].ndb_unique_record_row;

    *key_row= record;

  }

  else if (table_share->primary_key != MAX_KEY)

  {

    /* Use primary key to access table */

    DBUG_PRINT("info", ("Using primary key"));

    *key_rec= m_index[table_share->primary_key].ndb_unique_record_row;

    *key_row= record;

  }

  else

  {

    /* Use hidden primary key previously read into m_ref. */

    DBUG_PRINT("info", ("Using hidden primary key (%llu)", m_ref));

    /* Can't use hidden pk if we didn't read it first */

    DBUG_ASSERT(m_read_before_write_removal_used == false);

    *key_rec= m_ndb_hidden_key_record;

    *key_row= (const uchar *)(&m_ref);

  }

  DBUG_VOID_RETURN;

}


/*

  Update one record in NDB using primary key

*/


int ha_ndbcluster::ndb_update_row(const uchar *old_data, uchar *new_data,

                                  int is_bulk_update)

{

  THD *thd= table->in_use;

  Thd_ndb *thd_ndb= m_thd_ndb;

  NdbTransaction *trans= thd_ndb->trans;

  NdbScanOperation* cursor= m_active_cursor;

  const NdbOperation *op;

  uint32 old_part_id= ~uint32(0), new_part_id= ~uint32(0);

  int error;

  longlong func_value;

  Uint32 func_value_uint32;

  bool have_pk= (table_share->primary_key != MAX_KEY);

  bool pk_update= (!m_read_before_write_removal_possible &&

                   have_pk &&

                   bitmap_is_overlapping(table->write_set, m_pk_bitmap_p) &&

                   primary_key_cmp(old_data, new_data));

  bool batch_allowed= !m_update_cannot_batch &&

    (is_bulk_update || thd_allow_batch(thd));

  NdbOperation::SetValueSpec sets[2];

  Uint32 num_sets= 0;


  DBUG_ENTER("ndb_update_row");

  DBUG_ASSERT(trans);


  error = check_slave_state(thd);

  if (unlikely(error))

    DBUG_RETURN(error);


  /*

   * If IGNORE the ignore constraint violations on primary and unique keys,

   * but check that it is not part of INSERT ... ON DUPLICATE KEY UPDATE

   */

  if (m_ignore_dup_key && (thd->lex->sql_command == SQLCOM_UPDATE ||

                           thd->lex->sql_command == SQLCOM_UPDATE_MULTI))

  {

    NDB_WRITE_OP write_op= (pk_update) ? NDB_PK_UPDATE : NDB_UPDATE;

    int peek_res= peek_indexed_rows(new_data, write_op);


    if (!peek_res)

    {

      DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);

    }

    if (peek_res != HA_ERR_KEY_NOT_FOUND)

      DBUG_RETURN(peek_res);

  }


  ha_statistic_increment(&SSV::ha_update_count);


  bool skip_partition_for_unique_index= FALSE;

  if (m_use_partition_pruning)

  {

    if (!cursor && m_read_before_write_removal_used)

    {

      ndb_index_type type= get_index_type(active_index);

      /*

        Ndb unique indexes are global so when

        m_read_before_write_removal_used is active

        the unique index can be used directly for update

        without finding the partitions

      */

      if (type == UNIQUE_INDEX ||

          type == UNIQUE_ORDERED_INDEX)

      {

        skip_partition_for_unique_index= TRUE;

        goto skip_partition_pruning;

      }

    }

    if ((error= get_parts_for_update(old_data, new_data, table->record[0],

                                     m_part_info, &old_part_id, &new_part_id,

                                     &func_value)))

    {

      m_part_info->err_value= func_value;

      DBUG_RETURN(error);

    }

    DBUG_PRINT("info", ("old_part_id: %u  new_part_id: %u", old_part_id, new_part_id));

  skip_partition_pruning:

    (void)0;

  }


  /*

   * Check for update of primary key or partition change

   * for special handling

   */

  if (pk_update || old_part_id != new_part_id)

  {

    DBUG_RETURN(ndb_pk_update_row(thd, old_data, new_data, old_part_id));

  }

  /*

    If we are updating a unique key with auto_increment

    then we need to update the auto_increment counter

   */

  if (table->found_next_number_field &&

      bitmap_is_set(table->write_set,

                    table->found_next_number_field->field_index) &&

      (error= set_auto_inc(thd, table->found_next_number_field)))

  {

    DBUG_RETURN(error);

  }

  /*

    Set only non-primary-key attributes.

    We already checked that any primary key attribute in write_set has no

    real changes.

  */

  bitmap_copy(&m_bitmap, table->write_set);

  bitmap_subtract(&m_bitmap, m_pk_bitmap_p);

  uchar *mask= (uchar *)(m_bitmap.bitmap);

  DBUG_ASSERT(!pk_update);


  NdbOperation::OperationOptions *poptions = NULL;

  NdbOperation::OperationOptions options;

  options.optionsPresent=0;


  /* Need to set the value of any user-defined partitioning function.

     (excecpt for when using unique index)

  */

  if (m_user_defined_partitioning && !skip_partition_for_unique_index)

  {

    if (func_value >= INT_MAX32)

      func_value_uint32= INT_MAX32;

    else

      func_value_uint32= (uint32)func_value;

    sets[num_sets].column= get_partition_id_column();

    sets[num_sets].value= &func_value_uint32;

    num_sets++;


    if (!cursor)

    {

      options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;

      options.partitionId= new_part_id;

    }

  }


  eventSetAnyValue(thd, &options);


  bool need_flush= add_row_check_if_batch_full(thd_ndb);


 const Uint32 authorValue = 1;

 if ((thd->slave_thread) &&

     (m_table->getExtraRowAuthorBits()))

 {

   /* Set author to indicate slave updated last */

   sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;

   sets[num_sets].value= &authorValue;

   num_sets++;

 }


 if (num_sets)

 {

   options.optionsPresent|= NdbOperation::OperationOptions::OO_SETVALUE;

   options.extraSetValues= sets;

   options.numExtraSetValues= num_sets;

 }


  if (thd->slave_thread || THDVAR(thd, deferred_constraints))

  {

    options.optionsPresent |=

      NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;

  }


  if (cursor)

  {

    /*

      We are scanning records and want to update the record

      that was just found, call updateCurrentTuple on the cursor

      to take over the lock to a new update operation

      And thus setting the primary key of the record from

      the active record in cursor

    */

    DBUG_PRINT("info", ("Calling updateTuple on cursor, write_set=0x%x",

                        table->write_set->bitmap[0]));


    if (options.optionsPresent != 0)

      poptions = &options;


    if (!(op= cursor->updateCurrentTuple(trans, m_ndb_record,

                                         (const char*)new_data, mask,

                                         poptions,

                                         sizeof(NdbOperation::OperationOptions))))

      ERR_RETURN(trans->getNdbError());


    m_lock_tuple= FALSE;

    thd_ndb->m_unsent_bytes+= 12;

  }

  else

  {

    const NdbRecord *key_rec;

    const uchar *key_row;

    setup_key_ref_for_ndb_record(&key_rec, &key_row, new_data,

                                 m_read_before_write_removal_used);


#ifdef HAVE_NDB_BINLOG

    Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];

    NdbInterpretedCode code(m_table, buffer,

                            sizeof(buffer)/sizeof(buffer[0]));


    if (thd->slave_thread && m_share->m_cfn_share &&

        m_share->m_cfn_share->m_conflict_fn)

    {

       /* Conflict resolution in slave thread. */

      if (unlikely((error = prepare_conflict_detection(UPDATE_ROW,

                                                       key_rec,

                                                       old_data,

                                                       new_data,

                                                       &code,

                                                       &options))))

        DBUG_RETURN(error);

    }

#endif /* HAVE_NDB_BINLOG */

    if (options.optionsPresent !=0)

      poptions= &options;


    if (!(op= trans->updateTuple(key_rec, (const char *)key_row,

                                 m_ndb_record, (const char*)new_data, mask,

                                 poptions,

                                 sizeof(NdbOperation::OperationOptions))))

      ERR_RETURN(trans->getNdbError());

  }


  uint blob_count= 0;

  if (uses_blob_value(table->write_set))

  {

    int row_offset= new_data - table->record[0];

    int res= set_blob_values(op, row_offset, table->write_set, &blob_count,

                             (batch_allowed && !need_flush));

    if (res != 0)

      DBUG_RETURN(res);

  }

  uint ignore_count= 0;

  /*

    Batch update operation if we are doing a scan for update, unless

    there exist UPDATE AFTER triggers

  */

  if (m_update_cannot_batch ||

      !(cursor || (batch_allowed && have_pk)) ||

      need_flush)

  {

    if (execute_no_commit(m_thd_ndb, trans,

                          m_ignore_no_key || m_read_before_write_removal_used,

                          &ignore_count) != 0)

    {

      no_uncommitted_rows_execute_failure();

      DBUG_RETURN(ndb_err(trans));

    }

  }

  else if (blob_count > 0)

    m_blobs_pending= TRUE;


  m_rows_changed++;

  m_rows_updated++;


  assert(m_rows_changed >= ignore_count);

  assert(m_rows_updated >= ignore_count);

  m_rows_changed-= ignore_count;

  m_rows_updated-= ignore_count;


  DBUG_RETURN(0);

}


/*

  handler delete interface

*/


int ha_ndbcluster::delete_row(const uchar *record)

{

  return ndb_delete_row(record, FALSE);

}


bool ha_ndbcluster::start_bulk_delete()

{

  DBUG_ENTER("start_bulk_delete");

  m_is_bulk_delete = true;

  DBUG_RETURN(0); // Bulk delete used by handler

}


int ha_ndbcluster::end_bulk_delete()

{

  NdbTransaction* trans= m_thd_ndb->trans;

  DBUG_ENTER("end_bulk_delete");

  assert(m_is_bulk_delete); // Don't allow end() without start()

  m_is_bulk_delete = false;


  // m_handler must be NULL or point to _this_ handler instance

  assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);


  if (m_thd_ndb->m_handler &&

      m_read_before_write_removal_possible)

  {

    /*

      This is an autocommit involving only one table and rbwr is on


      Commit the autocommit transaction early(before the usual place

      in ndbcluster_commit) in order to:

      1) save one round trip, "no-commit+commit" converted to "commit"

      2) return the correct number of updated and affected rows

         to the delete loop(which will ask handler in rbwr mode)

    */

    DBUG_PRINT("info", ("committing auto-commit+rbwr early"));

    uint ignore_count= 0;

    const int ignore_error= 1;

    if (execute_commit(table->in_use, m_thd_ndb, trans,

                       m_thd_ndb->m_force_send, ignore_error,

                       &ignore_count) != 0)

    {

      no_uncommitted_rows_execute_failure();

      DBUG_RETURN(ndb_err(trans));

    }

    DBUG_PRINT("info", ("ignore_count: %u", ignore_count));

    assert(m_rows_deleted >= ignore_count);

    m_rows_deleted-= ignore_count;

    DBUG_RETURN(0);

  }


  if (m_thd_ndb->m_unsent_bytes == 0)

  {

    DBUG_PRINT("exit", ("skip execute - no unsent bytes"));

    DBUG_RETURN(0);

  }


  if (thd_allow_batch(table->in_use))

  {

    /*

      Turned on by @@transaction_allow_batching=ON

      or implicitly by slave exec thread

    */

    DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));

    DBUG_RETURN(0);

  }


  if (m_thd_ndb->m_handler)

  {

    // Execute at commit time(in 'ndbcluster_commit') to save a round trip

    DBUG_PRINT("exit", ("skip execute - simple autocommit"));

    DBUG_RETURN(0);

  }


  uint ignore_count= 0;

  if (execute_no_commit(m_thd_ndb, trans,

                        m_ignore_no_key || m_read_before_write_removal_used,

                        &ignore_count) != 0)

  {

    no_uncommitted_rows_execute_failure();

    DBUG_RETURN(ndb_err(trans));

  }


  assert(m_rows_deleted >= ignore_count);

  m_rows_deleted-= ignore_count;

  no_uncommitted_rows_update(ignore_count);

  DBUG_RETURN(0);

}


int ha_ndbcluster::ndb_delete_row(const uchar *record,

                                  bool primary_key_update)

{

  THD *thd= table->in_use;

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  NdbTransaction *trans= m_thd_ndb->trans;

  NdbScanOperation* cursor= m_active_cursor;

  const NdbOperation *op;

  uint32 part_id= ~uint32(0);

  int error;

  bool allow_batch= !m_delete_cannot_batch &&

    (m_is_bulk_delete || thd_allow_batch(thd));


  DBUG_ENTER("ndb_delete_row");

  DBUG_ASSERT(trans);


  error = check_slave_state(thd);

  if (unlikely(error))

    DBUG_RETURN(error);


  ha_statistic_increment(&SSV::ha_delete_count);

  m_rows_changed++;


  bool skip_partition_for_unique_index= FALSE;

  if (m_use_partition_pruning)

  {

    if (!cursor && m_read_before_write_removal_used)

    {

      ndb_index_type type= get_index_type(active_index);

      /*

        Ndb unique indexes are global so when

        m_read_before_write_removal_used is active

        the unique index can be used directly for deleting

        without finding the partitions

      */

      if (type == UNIQUE_INDEX ||

          type == UNIQUE_ORDERED_INDEX)

      {

        skip_partition_for_unique_index= TRUE;

        goto skip_partition_pruning;

      }

    }

    if ((error= get_part_for_delete(record, table->record[0], m_part_info,

                                    &part_id)))

    {

      DBUG_RETURN(error);

    }

  skip_partition_pruning:

    (void)0;

  }


  NdbOperation::OperationOptions options;

  NdbOperation::OperationOptions *poptions = NULL;

  options.optionsPresent=0;


  eventSetAnyValue(thd, &options);


  /*

    Poor approx. let delete ~ tabsize / 4

  */

  uint delete_size= 12 + (m_bytes_per_write >> 2);

  bool need_flush= add_row_check_if_batch_full_size(thd_ndb, delete_size);


  if (thd->slave_thread || THDVAR(thd, deferred_constraints))

  {

    options.optionsPresent |=

      NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;

  }


  if (cursor)

  {

    if (options.optionsPresent != 0)

      poptions = &options;


    /*

      We are scanning records and want to delete the record

      that was just found, call deleteTuple on the cursor

      to take over the lock to a new delete operation

      And thus setting the primary key of the record from

      the active record in cursor

    */

    DBUG_PRINT("info", ("Calling deleteTuple on cursor"));

    if ((op = cursor->deleteCurrentTuple(trans, m_ndb_record,

                                         NULL, // result_row

                                         NULL, // result_mask

                                         poptions,

                                         sizeof(NdbOperation::OperationOptions))) == 0)

      ERR_RETURN(trans->getNdbError());

    m_lock_tuple= FALSE;

    thd_ndb->m_unsent_bytes+= 12;


    no_uncommitted_rows_update(-1);

    m_rows_deleted++;


    if (!(primary_key_update || m_delete_cannot_batch))

    {

      // If deleting from cursor, NoCommit will be handled in next_result

      DBUG_RETURN(0);

    }

  }

  else

  {

    const NdbRecord *key_rec;

    const uchar *key_row;


    if (m_user_defined_partitioning && !skip_partition_for_unique_index)

    {

      options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;

      options.partitionId= part_id;

    }


    setup_key_ref_for_ndb_record(&key_rec, &key_row, record,

                                 m_read_before_write_removal_used);


#ifdef HAVE_NDB_BINLOG

    Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];

    NdbInterpretedCode code(m_table, buffer,

                            sizeof(buffer)/sizeof(buffer[0]));

    if (thd->slave_thread && m_share->m_cfn_share &&

        m_share->m_cfn_share->m_conflict_fn)

    {

      /* Conflict resolution in slave thread. */

      if (unlikely((error = prepare_conflict_detection(DELETE_ROW,

                                                       key_rec,

                                                       key_row, /* old_data */

                                                       NULL,    /* new_data */

                                                       &code,

                                                       &options))))

        DBUG_RETURN(error);

    }

#endif /* HAVE_NDB_BINLOG */

    if (options.optionsPresent != 0)

      poptions= &options;


    if (!(op=trans->deleteTuple(key_rec, (const char *)key_row,

                                m_ndb_record,

                                NULL, // row

                                NULL, // mask

                                poptions,

                                sizeof(NdbOperation::OperationOptions))))

      ERR_RETURN(trans->getNdbError());


    no_uncommitted_rows_update(-1);

    m_rows_deleted++;


    /*

      Check if we can batch the delete.


      We don't batch deletes as part of primary key updates.

      We do not batch deletes on tables with no primary key. For such tables,

      replication uses full table scan to locate the row to delete. The

      problem is the following scenario when deleting 2 (or more) rows:


       1. Table scan to locate the first row.

       2. Delete the row, batched so no execute.

       3. Table scan to locate the second row is executed, along with the

          batched delete operation from step 2.

       4. The first row is returned from nextResult() (not deleted yet).

       5. The kernel deletes the row (operation from step 2).

       6. lockCurrentTuple() is called on the row returned in step 4. However,

          as that row is now deleted, the operation fails and the transaction

          is aborted.

       7. The delete of the second tuple now fails, as the transaction has

          been aborted.

    */


    if ( allow_batch &&

         table_share->primary_key != MAX_KEY &&

         !primary_key_update &&

         !need_flush)

    {

      DBUG_RETURN(0);

    }

  }


  // Execute delete operation

  uint ignore_count= 0;

  if (execute_no_commit(m_thd_ndb, trans,

                        m_ignore_no_key || m_read_before_write_removal_used,

                        &ignore_count) != 0)

  {

    no_uncommitted_rows_execute_failure();

    DBUG_RETURN(ndb_err(trans));

  }

  if (!primary_key_update)

  {

    assert(m_rows_deleted >= ignore_count);

    m_rows_deleted-= ignore_count;

    no_uncommitted_rows_update(ignore_count);

  }

  DBUG_RETURN(0);

}


void ha_ndbcluster::unpack_record(uchar *dst_row, const uchar *src_row)

{

  int res;

  DBUG_ASSERT(src_row != NULL);


  my_ptrdiff_t dst_offset= dst_row - table->record[0];

  my_ptrdiff_t src_offset= src_row - table->record[0];


  /* Initialize the NULL bitmap. */

  memset(dst_row, 0xff, table->s->null_bytes);


  uchar *blob_ptr= m_blobs_buffer;


  for (uint i= 0; i < table_share->fields; i++)

  {

    Field *field= table->field[i];

    if (bitmap_is_set(table->read_set, i))

    {

      if (field->type() == MYSQL_TYPE_BIT)

      {

        Field_bit *field_bit= static_cast<Field_bit*>(field);

        if (!field->is_real_null(src_offset))

        {

          field->move_field_offset(src_offset);

          longlong value= field_bit->val_int();

          field->move_field_offset(dst_offset-src_offset);

          field_bit->set_notnull();

          /* Field_bit in DBUG requires the bit set in write_set for store(). */

          my_bitmap_map *old_map=

            dbug_tmp_use_all_columns(table, table->write_set);

          int res = field_bit->store(value, true);

          assert(res == 0); NDB_IGNORE_VALUE(res);

          dbug_tmp_restore_column_map(table->write_set, old_map);

          field->move_field_offset(-dst_offset);

        }

      }

      else if (field->flags & BLOB_FLAG)

      {

        Field_blob *field_blob= (Field_blob *)field;

        NdbBlob *ndb_blob= m_value[i].blob;

        /* unpack_record *only* called for scan result processing

         * *while* the scan is open and the Blob is active.

         * Verify Blob state to be certain.

         * Accessing PK/UK op Blobs after execute() is unsafe

         */

        DBUG_ASSERT(ndb_blob != 0);

        DBUG_ASSERT(ndb_blob->getState() == NdbBlob::Active);

        int isNull;

        res= ndb_blob->getNull(isNull);

        DBUG_ASSERT(res == 0);                  // Already succeeded once

        Uint64 len64= 0;

        field_blob->move_field_offset(dst_offset);

        if (!isNull)

        {

          res= ndb_blob->getLength(len64);

          DBUG_ASSERT(res == 0 && len64 <= (Uint64)0xffffffff);

          field->set_notnull();

        }

        /* Need not set_null(), as we initialized null bits to 1 above. */

        field_blob->set_ptr((uint32)len64, blob_ptr);

        field_blob->move_field_offset(-dst_offset);

        blob_ptr+= (len64 + 7) & ~((Uint64)7);

      }

      else

      {

        field->move_field_offset(src_offset);

        /* Normal field (not blob or bit type). */

        if (!field->is_null())

        {

          /* Only copy actually used bytes of varstrings. */

          uint32 actual_length= field_used_length(field);

          uchar *src_ptr= field->ptr;

          field->move_field_offset(dst_offset - src_offset);

          field->set_notnull();

          memcpy(field->ptr, src_ptr, actual_length);

#ifdef HAVE_purify

          /*

            We get Valgrind warnings on uninitialised padding bytes in

            varstrings, for example when writing rows to temporary tables.

            So for valgrind builds we pad with zeros, not needed for

            production code.

          */

          if (actual_length < field->pack_length())

            memset(field->ptr + actual_length, 0,

                  field->pack_length() - actual_length);

#endif

          field->move_field_offset(-dst_offset);

        }

        else

          field->move_field_offset(-src_offset);

        /* No action needed for a NULL field. */

      }

    }

  }

}


static void get_default_value(void *def_val, Field *field)

{

  DBUG_ASSERT(field != NULL);


  my_ptrdiff_t src_offset= field->table->s->default_values - field->table->record[0];


  {

    if (bitmap_is_set(field->table->read_set, field->field_index))

    {

      if (field->type() == MYSQL_TYPE_BIT)

      {

        Field_bit *field_bit= static_cast<Field_bit*>(field);

        if (!field->is_real_null(src_offset))

        {

          field->move_field_offset(src_offset);

          longlong value= field_bit->val_int();

          /* Map to NdbApi format - two Uint32s */

          Uint32 out[2];

          out[0] = 0;

          out[1] = 0;

          for (int b=0; b < 64; b++)

          {

            out[b >> 5] |= (value & 1) << (b & 31);


            value= value >> 1;

          }

          memcpy(def_val, out, sizeof(longlong));

          field->move_field_offset(-src_offset);

        }

      }

      else if (field->flags & BLOB_FLAG)

      {

        assert(false);

      }

      else

      {

        field->move_field_offset(src_offset);

        /* Normal field (not blob or bit type). */

        if (!field->is_null())

        {

          /* Only copy actually used bytes of varstrings. */

          uint32 actual_length= field_used_length(field);

          uchar *src_ptr= field->ptr;

          field->set_notnull();

          memcpy(def_val, src_ptr, actual_length);

#ifdef HAVE_purify

          if (actual_length < field->pack_length())

            memset(((char*)def_val) + actual_length, 0,

                  field->pack_length() - actual_length);

#endif

        }

        field->move_field_offset(-src_offset);

        /* No action needed for a NULL field. */

      }

    }

  }

}


/*

    DBUG_EXECUTE("value", print_results(););

*/


void ha_ndbcluster::print_results()

{

  DBUG_ENTER("print_results");


#ifndef DBUG_OFF


  char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH];

  String type(buf_type, sizeof(buf_type), &my_charset_bin);

  String val(buf_val, sizeof(buf_val), &my_charset_bin);

  for (uint f= 0; f < table_share->fields; f++)

  {

    /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */

    char buf[2000];

    Field *field;

    void* ptr;

    NdbValue value;


    buf[0]= 0;

    field= table->field[f];

    if (!(value= m_value[f]).ptr)

    {

      strmov(buf, "not read");

      goto print_value;

    }


    ptr= field->ptr;


    if (! (field->flags & BLOB_FLAG))

    {

      if (value.rec->isNULL())

      {

        strmov(buf, "NULL");

        goto print_value;

      }

      type.length(0);

      val.length(0);

      field->sql_type(type);

      field->val_str(&val);

      my_snprintf(buf, sizeof(buf), "%s %s", type.c_ptr(), val.c_ptr());

    }

    else

    {

      NdbBlob *ndb_blob= value.blob;

      bool isNull= TRUE;

      assert(ndb_blob->getState() == NdbBlob::Active);

      ndb_blob->getNull(isNull);

      if (isNull)

        strmov(buf, "NULL");

    }


print_value:

    DBUG_PRINT("value", ("%u,%s: %s", f, field->field_name, buf));

  }

#endif

  DBUG_VOID_RETURN;

}


/*

  Set fields in partition functions in read set for underlying handlers


  SYNOPSIS

    include_partition_fields_in_used_fields()


  RETURN VALUE

    NONE


  DESCRIPTION

    Some handlers only read fields as specified by the bitmap for the

    read set. For partitioned handlers we always require that the

    fields of the partition functions are read such that we can

    calculate the partition id to place updated and deleted records.

*/


static void

include_partition_fields_in_used_fields(Field **ptr, MY_BITMAP *read_set)

{

  DBUG_ENTER("include_partition_fields_in_used_fields");

  do

  {

    bitmap_set_bit(read_set, (*ptr)->field_index);

  } while (*(++ptr));

  DBUG_VOID_RETURN;

}


int ha_ndbcluster::index_init(uint index, bool sorted)

{

  DBUG_ENTER("ha_ndbcluster::index_init");

  DBUG_PRINT("enter", ("index: %u  sorted: %d", index, sorted));

  active_index= index;

  m_sorted= sorted;

  /*

    Locks are are explicitly released in scan

    unless m_lock.type == TL_READ_HIGH_PRIORITY

    and no sub-sequent call to unlock_row()

  */

  m_lock_tuple= FALSE;

  if (table_share->primary_key == MAX_KEY &&

      m_use_partition_pruning)

    include_partition_fields_in_used_fields(

      m_part_info->full_part_field_array,

      table->read_set);

  DBUG_RETURN(0);

}


int ha_ndbcluster::index_end()

{

  DBUG_ENTER("ha_ndbcluster::index_end");

  DBUG_RETURN(close_scan());

}


static

int

check_null_in_key(const KEY* key_info, const uchar *key, uint key_len)

{

  KEY_PART_INFO *curr_part, *end_part;

  const uchar* end_ptr= key + key_len;

  curr_part= key_info->key_part;

  end_part= curr_part + key_info->user_defined_key_parts;


  for (; curr_part != end_part && key < end_ptr; curr_part++)

  {

    if (curr_part->null_bit && *key)

      return 1;


    key += curr_part->store_length;

  }

  return 0;

}


int ha_ndbcluster::index_read_idx_map(uchar* buf, uint index,

                                      const uchar* key,

                                      key_part_map keypart_map,

                                      enum ha_rkey_function find_flag)

{

  DBUG_ENTER("ha_ndbcluster::index_read_idx_map");

  int error= index_init(index, 0);

  if (unlikely(error))

    DBUG_RETURN(error);


  DBUG_RETURN(index_read_map(buf, key, keypart_map, find_flag));

}


int ha_ndbcluster::index_read(uchar *buf,

                              const uchar *key, uint key_len,

                              enum ha_rkey_function find_flag)

{

  key_range start_key;

  bool descending= FALSE;

  DBUG_ENTER("ha_ndbcluster::index_read");

  DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d",

                       active_index, key_len, find_flag));


  start_key.key= key;

  start_key.length= key_len;

  start_key.flag= find_flag;

  descending= FALSE;

  switch (find_flag) {

  case HA_READ_KEY_OR_PREV:

  case HA_READ_BEFORE_KEY:

  case HA_READ_PREFIX_LAST:

  case HA_READ_PREFIX_LAST_OR_PREV:

    descending= TRUE;

    break;

  default:

    break;

  }

  const int error= read_range_first_to_buf(&start_key, 0, descending,

                                           m_sorted, buf);

  table->status=error ? STATUS_NOT_FOUND: 0;

  DBUG_RETURN(error);

}


int ha_ndbcluster::index_next(uchar *buf)

{

  DBUG_ENTER("ha_ndbcluster::index_next");

  ha_statistic_increment(&SSV::ha_read_next_count);

  const int error= next_result(buf);

  table->status=error ? STATUS_NOT_FOUND: 0;

  DBUG_RETURN(error);

}


int ha_ndbcluster::index_prev(uchar *buf)

{

  DBUG_ENTER("ha_ndbcluster::index_prev");

  ha_statistic_increment(&SSV::ha_read_prev_count);

  const int error= next_result(buf);

  table->status=error ? STATUS_NOT_FOUND: 0;

  DBUG_RETURN(error);

}


int ha_ndbcluster::index_first(uchar *buf)

{

  DBUG_ENTER("ha_ndbcluster::index_first");

  ha_statistic_increment(&SSV::ha_read_first_count);

  // Start the ordered index scan and fetch the first row


  // Only HA_READ_ORDER indexes get called by index_first

  const int error= ordered_index_scan(0, 0, m_sorted, FALSE, buf, NULL);

  table->status=error ? STATUS_NOT_FOUND: 0;

  DBUG_RETURN(error);

}


int ha_ndbcluster::index_last(uchar *buf)

{

  DBUG_ENTER("ha_ndbcluster::index_last");

  ha_statistic_increment(&SSV::ha_read_last_count);

  const int error= ordered_index_scan(0, 0, m_sorted, TRUE, buf, NULL);

  table->status=error ? STATUS_NOT_FOUND: 0;

  DBUG_RETURN(error);

}


int ha_ndbcluster::index_read_last(uchar * buf, const uchar * key, uint key_len)

{

  DBUG_ENTER("ha_ndbcluster::index_read_last");

  DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));

}


int ha_ndbcluster::read_first_row(uchar * buf, uint primary_key)

{

  register int error;

  DBUG_ENTER("ha_ndbcluster::read_first_row");


  ha_statistic_increment(&SSV::ha_read_first_count);


  /*

    If there is very few deleted rows in the table, find the first row by

    scanning the table.

    TODO remove the test for HA_READ_ORDER

  */

  if (stats.deleted < 10 || primary_key >= MAX_KEY ||

      !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))

  {

    (void) ha_rnd_init(1);

    while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED) ;

  }

  else

  {

    /* Find the first row through the primary key */

    (void) ha_index_init(primary_key, 0);

    error=index_first(buf);

  }

  DBUG_RETURN(error);

}


int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,

                                           const key_range *end_key,

                                           bool desc, bool sorted,

                                           uchar* buf)

{

  part_id_range part_spec;

  ndb_index_type type= get_index_type(active_index);

  const KEY* key_info= table->key_info+active_index;

  int error;

  DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");

  DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted));


  if (unlikely((error= close_scan())))

    DBUG_RETURN(error);


  if (m_use_partition_pruning)

  {

    DBUG_ASSERT(m_pushed_join_operation != PUSHED_ROOT);

    get_partition_set(table, buf, active_index, start_key, &part_spec);

    DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",

                        part_spec.start_part, part_spec.end_part));

    /*

      If partition pruning has found no partition in set

      we can return HA_ERR_END_OF_FILE

      If partition pruning has found exactly one partition in set

      we can optimize scan to run towards that partition only.

    */

    if (part_spec.start_part > part_spec.end_part)

    {

      DBUG_RETURN(HA_ERR_END_OF_FILE);

    }


    if (part_spec.start_part == part_spec.end_part)

    {

      /*

        Only one partition is required to scan, if sorted is required we

        don't need it any more since output from one ordered partitioned

        index is always sorted.

      */

      sorted= FALSE;

      if (unlikely(!get_transaction_part_id(part_spec.start_part, error)))

      {

        DBUG_RETURN(error);

      }

    }

  }


  switch (type){

  case PRIMARY_KEY_ORDERED_INDEX:

  case PRIMARY_KEY_INDEX:

    if (start_key &&

        start_key->length == key_info->key_length &&

        start_key->flag == HA_READ_KEY_EXACT)

    {

      if (!m_thd_ndb->trans)

        if (unlikely(!start_transaction_key(active_index,

                                            start_key->key, error)))

          DBUG_RETURN(error);

      error= pk_read(start_key->key, start_key->length, buf,

                  (m_use_partition_pruning)? &(part_spec.start_part) : NULL);

      DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);

    }

    break;

  case UNIQUE_ORDERED_INDEX:

  case UNIQUE_INDEX:

    if (start_key && start_key->length == key_info->key_length &&

        start_key->flag == HA_READ_KEY_EXACT &&

        !check_null_in_key(key_info, start_key->key, start_key->length))

    {

      if (!m_thd_ndb->trans)

        if (unlikely(!start_transaction_key(active_index,

                                            start_key->key, error)))

          DBUG_RETURN(error);

      error= unique_index_read(start_key->key, start_key->length, buf);

      DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);

    }

    else if (type == UNIQUE_INDEX)

      DBUG_RETURN(full_table_scan(key_info,

                                  start_key,

                                  end_key,

                                  buf));

    break;

  default:

    break;

  }

  if (!m_use_partition_pruning && !m_thd_ndb->trans)

  {

    get_partition_set(table, buf, active_index, start_key, &part_spec);

    if (part_spec.start_part == part_spec.end_part)

      if (unlikely(!start_transaction_part_id(part_spec.start_part, error)))

        DBUG_RETURN(error);

  }

  // Start the ordered index scan and fetch the first row

  DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf,

          (m_use_partition_pruning)? &part_spec : NULL));

}


int ha_ndbcluster::read_range_first(const key_range *start_key,

                                    const key_range *end_key,

                                    bool eq_r, bool sorted)

{

  uchar* buf= table->record[0];

  DBUG_ENTER("ha_ndbcluster::read_range_first");

  DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE,

                                      sorted, buf));

}


int ha_ndbcluster::read_range_next()

{

  DBUG_ENTER("ha_ndbcluster::read_range_next");

  DBUG_RETURN(next_result(table->record[0]));

}


int ha_ndbcluster::rnd_init(bool scan)

{

  int error;

  DBUG_ENTER("rnd_init");

  DBUG_PRINT("enter", ("scan: %d", scan));


  if ((error= close_scan()))

    DBUG_RETURN(error);

  index_init(table_share->primary_key, 0);

  DBUG_RETURN(0);

}


int ha_ndbcluster::close_scan()

{

  /*

    workaround for bug #39872 - explain causes segv

    - rnd_end/close_scan is called on unlocked table

    - should be fixed in server code, but this will

    not be done until 6.0 as it is too intrusive

  */

  if (m_thd_ndb == NULL)

    return 0;

  NdbTransaction *trans= m_thd_ndb->trans;

  int error;

  DBUG_ENTER("close_scan");


  if (m_active_query)

  {

    m_active_query->close(m_thd_ndb->m_force_send);

    m_active_query= NULL;

  }


  NdbScanOperation *cursor= m_active_cursor;


  if (!cursor)

  {

    cursor = m_multi_cursor;

    if (!cursor)

      DBUG_RETURN(0);

  }


  if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)

    DBUG_RETURN(error);


  if (m_thd_ndb->m_unsent_bytes)

  {

    /*

      Take over any pending transactions to the

      deleteing/updating transaction before closing the scan

    */

    DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",

                        (long) m_thd_ndb->m_unsent_bytes));

    if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)

    {

      no_uncommitted_rows_execute_failure();

      DBUG_RETURN(ndb_err(trans));

    }

  }


  cursor->close(m_thd_ndb->m_force_send, TRUE);

  m_active_cursor= NULL;

  m_multi_cursor= NULL;

  DBUG_RETURN(0);

}


int ha_ndbcluster::rnd_end()

{

  DBUG_ENTER("rnd_end");

  DBUG_RETURN(close_scan());

}


int ha_ndbcluster::rnd_next(uchar *buf)

{

  DBUG_ENTER("rnd_next");

  ha_statistic_increment(&SSV::ha_read_rnd_next_count);


  int error;

  if (m_active_cursor)

    error= next_result(buf);

  else if (m_active_query)

    error= next_result(buf);

  else

    error= full_table_scan(NULL, NULL, NULL, buf);


  table->status= error ? STATUS_NOT_FOUND: 0;

  DBUG_RETURN(error);

}


int ha_ndbcluster::rnd_pos(uchar *buf, uchar *pos)

{

  DBUG_ENTER("rnd_pos");

  ha_statistic_increment(&SSV::ha_read_rnd_count);

  // The primary key for the record is stored in pos

  // Perform a pk_read using primary key "index"

  {

    part_id_range part_spec;

    uint key_length= ref_length;

    if (m_user_defined_partitioning)

    {

      if (table_share->primary_key == MAX_KEY)

      {

        /*

          The partition id has been fetched from ndb

          and has been stored directly after the hidden key

        */

        DBUG_DUMP("key+part", pos, key_length);

        key_length= ref_length - sizeof(m_part_id);

        part_spec.start_part= part_spec.end_part= *(uint32 *)(pos + key_length);

      }

      else

      {

        key_range key_spec;

        KEY *key_info= table->key_info + table_share->primary_key;

        key_spec.key= pos;

        key_spec.length= key_length;

        key_spec.flag= HA_READ_KEY_EXACT;

        get_full_part_id_from_key(table, buf, key_info,

                                  &key_spec, &part_spec);

        DBUG_ASSERT(part_spec.start_part == part_spec.end_part);

      }

      DBUG_PRINT("info", ("partition id %u", part_spec.start_part));

    }

    DBUG_DUMP("key", pos, key_length);

    int res= pk_read(pos, key_length, buf,

                     (m_user_defined_partitioning) ?

                     &(part_spec.start_part)

                     : NULL);

    if (res == HA_ERR_KEY_NOT_FOUND)

    {

      res= HA_ERR_RECORD_DELETED;

    }

    table->status= res ? STATUS_NOT_FOUND: 0;

    DBUG_RETURN(res);

  }

}


void ha_ndbcluster::position(const uchar *record)

{

  KEY *key_info;

  KEY_PART_INFO *key_part;

  KEY_PART_INFO *end;

  uchar *buff;

  uint key_length;


  DBUG_ENTER("position");


  if (table_share->primary_key != MAX_KEY)

  {

    key_length= ref_length;

    key_info= table->key_info + table_share->primary_key;

    key_part= key_info->key_part;

    end= key_part + key_info->user_defined_key_parts;

    buff= ref;


    for (; key_part != end; key_part++)

    {

      if (key_part->null_bit) {

        /* Store 0 if the key part is a NULL part */

        if (record[key_part->null_offset]

            & key_part->null_bit) {

          *buff++= 1;

          continue;

        }

        *buff++= 0;

      }


      size_t len = key_part->length;

      const uchar * ptr = record + key_part->offset;

      Field *field = key_part->field;

      if (field->type() ==  MYSQL_TYPE_VARCHAR)

      {

        if (((Field_varstring*)field)->length_bytes == 1)

        {

          buff[0] = ptr[0];

          buff[1] = 0;

          memcpy(buff+2, ptr + 1, len);

        }

        else

        {

          memcpy(buff, ptr, len + 2);

        }

        len += 2;

      }

      else

      {

        memcpy(buff, ptr, len);

      }

      buff += len;

    }

  }

  else

  {

    // No primary key, get hidden key

    DBUG_PRINT("info", ("Getting hidden key"));

    // If table has user defined partition save the partition id as well

    if (m_user_defined_partitioning)

    {

      DBUG_PRINT("info", ("Saving partition id %u", m_part_id));

      key_length= ref_length - sizeof(m_part_id);

      memcpy(ref+key_length, (void *)&m_part_id, sizeof(m_part_id));

    }

    else

      key_length= ref_length;

#ifndef DBUG_OFF

    int hidden_no= table->s->fields;

    const NDBTAB *tab= m_table;

    const NDBCOL *hidden_col= tab->getColumn(hidden_no);

    DBUG_ASSERT(hidden_col->getPrimaryKey() &&

                hidden_col->getAutoIncrement() &&

                key_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH);

#endif

    memcpy(ref, &m_ref, key_length);

  }

#ifndef DBUG_OFF

  if (table_share->primary_key == MAX_KEY && m_user_defined_partitioning)

    DBUG_DUMP("key+part", ref, key_length+sizeof(m_part_id));

#endif

  DBUG_DUMP("ref", ref, key_length);

  DBUG_VOID_RETURN;

}


int

ha_ndbcluster::cmp_ref(const uchar * ref1, const uchar * ref2)

{

  DBUG_ENTER("cmp_ref");


  if (table_share->primary_key != MAX_KEY)

  {

    KEY *key_info= table->key_info + table_share->primary_key;

    KEY_PART_INFO *key_part= key_info->key_part;

    KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;


    for (; key_part != end; key_part++)

    {

      // NOTE: No need to check for null since PK is not-null


      Field *field= key_part->field;

      int result= field->key_cmp(ref1, ref2);

      if (result)

      {

        DBUG_RETURN(result);

      }


      if (field->type() ==  MYSQL_TYPE_VARCHAR)

      {

        ref1+= 2;

        ref2+= 2;

      }


      ref1+= key_part->length;

      ref2+= key_part->length;

    }

    DBUG_RETURN(0);

  }

  else

  {

    DBUG_RETURN(memcmp(ref1, ref2, ref_length));

  }

}


int ha_ndbcluster::info(uint flag)

{

  THD *thd= table->in_use;

  int result= 0;

  DBUG_ENTER("info");

  DBUG_PRINT("enter", ("flag: %d", flag));


  if (flag & HA_STATUS_POS)

    DBUG_PRINT("info", ("HA_STATUS_POS"));

  if (flag & HA_STATUS_TIME)

    DBUG_PRINT("info", ("HA_STATUS_TIME"));

  while (flag & HA_STATUS_VARIABLE)

  {

    if (!thd)

      thd= current_thd;

    DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));


    if (!m_table_info)

    {

      if ((my_errno= check_ndb_connection(thd)))

        DBUG_RETURN(my_errno);

    }


    /*

      May need to update local copy of statistics in

      'm_table_info', either directly from datanodes,

      or from shared (mutex protected) cached copy, if:

       1) 'use_exact_count' has been set (by config or user).

       2) HA_STATUS_NO_LOCK -> read from shared cached copy.

       3) Local copy is invalid.

    */

    bool exact_count= THDVAR(thd, use_exact_count);

    if (exact_count                 ||         // 1)

        !(flag & HA_STATUS_NO_LOCK) ||         // 2)

        m_table_info == NULL        ||         // 3)

        m_table_info->records == ~(ha_rows)0)  // 3)

    {

      result= update_stats(thd, (exact_count || !(flag & HA_STATUS_NO_LOCK)));

      if (result)

        DBUG_RETURN(result);

    }

    /* Read from local statistics, fast and fuzzy, wo/ locks */

    else

    {

      DBUG_ASSERT(m_table_info->records != ~(ha_rows)0);

      stats.records= m_table_info->records +

                     m_table_info->no_uncommitted_rows_count;

    }


    if (thd->lex->sql_command != SQLCOM_SHOW_TABLE_STATUS &&

        thd->lex->sql_command != SQLCOM_SHOW_KEYS)

    {

      /*

        just use whatever stats we have. However,

        optimizer interprets the values 0 and 1 as EXACT:

          -> < 2 should not be returned.

      */

      if (stats.records < 2)

        stats.records= 2;

    }

    break;

  }

  /* RPK moved to variable part */

  if (flag & HA_STATUS_VARIABLE)

  {

    /* No meaningful way to return error */

    DBUG_PRINT("info", ("rec_per_key"));

    set_rec_per_key();

  }

  if (flag & HA_STATUS_ERRKEY)

  {

    DBUG_PRINT("info", ("HA_STATUS_ERRKEY"));

    errkey= m_dupkey;

  }

  if (flag & HA_STATUS_AUTO)

  {

    DBUG_PRINT("info", ("HA_STATUS_AUTO"));

    if (m_table && table->found_next_number_field)

    {

      if (!thd)

        thd= current_thd;

      if ((my_errno= check_ndb_connection(thd)))

        DBUG_RETURN(my_errno);

      Ndb *ndb= get_ndb(thd);

      Ndb_tuple_id_range_guard g(m_share);


      Uint64 auto_increment_value64;

      if (ndb->readAutoIncrementValue(m_table, g.range,

                                      auto_increment_value64) == -1)

      {

        const NdbError err= ndb->getNdbError();

        sql_print_error("Error %lu in readAutoIncrementValue(): %s",

                        (ulong) err.code, err.message);

        stats.auto_increment_value= ~(ulonglong)0;

      }

      else

        stats.auto_increment_value= (ulonglong)auto_increment_value64;

    }

  }


  if(result == -1)

    result= HA_ERR_NO_CONNECTION;


  DBUG_RETURN(result);

}


void ha_ndbcluster::get_dynamic_partition_info(PARTITION_STATS *stat_info,

                                               uint part_id)

{

  DBUG_PRINT("info", ("ha_ndbcluster::get_dynamic_partition_info"));


  memset(stat_info, 0, sizeof(PARTITION_STATS));

  int error = 0;

  THD *thd = table->in_use;


  if (!thd)

    thd = current_thd;

  if (!m_table_info)

  {

    if ((error = check_ndb_connection(thd)))

      goto err;

  }

  error = update_stats(thd, 1, false, part_id);


  if (error == 0)

  {

    stat_info->records = stats.records;

    stat_info->mean_rec_length = stats.mean_rec_length;

    stat_info->data_file_length = stats.data_file_length;

    stat_info->delete_length = stats.delete_length;

    stat_info->max_data_file_length = stats.max_data_file_length;

    return;

  }


err:


  DBUG_PRINT("warning",

    ("ha_ndbcluster::get_dynamic_partition_info failed with error code %u",

     error));

}


int ha_ndbcluster::extra(enum ha_extra_function operation)

{

  DBUG_ENTER("extra");

  switch (operation) {

  case HA_EXTRA_IGNORE_DUP_KEY:       /* Dup keys don't rollback everything*/

    DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY"));

    DBUG_PRINT("info", ("Ignoring duplicate key"));

    m_ignore_dup_key= TRUE;

    break;

  case HA_EXTRA_NO_IGNORE_DUP_KEY:

    DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY"));

    m_ignore_dup_key= FALSE;

    break;

  case HA_EXTRA_IGNORE_NO_KEY:

    DBUG_PRINT("info", ("HA_EXTRA_IGNORE_NO_KEY"));

    DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));

    m_ignore_no_key= TRUE;

    break;

  case HA_EXTRA_NO_IGNORE_NO_KEY:

    DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_NO_KEY"));

    DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));

    m_ignore_no_key= FALSE;

    break;

  case HA_EXTRA_WRITE_CAN_REPLACE:

    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CAN_REPLACE"));

    if (!m_has_unique_index ||

        current_thd->slave_thread || /* always set if slave, quick fix for bug 27378 */

        isManualBinlogExec(current_thd)) /* or if manual binlog application, for bug 46662 */

    {

      DBUG_PRINT("info", ("Turning ON use of write instead of insert"));

      m_use_write= TRUE;

    }

    break;

  case HA_EXTRA_WRITE_CANNOT_REPLACE:

    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CANNOT_REPLACE"));

    DBUG_PRINT("info", ("Turning OFF use of write instead of insert"));

    m_use_write= FALSE;

    break;

  case HA_EXTRA_DELETE_CANNOT_BATCH:

    DBUG_PRINT("info", ("HA_EXTRA_DELETE_CANNOT_BATCH"));

    m_delete_cannot_batch= TRUE;

    break;

  case HA_EXTRA_UPDATE_CANNOT_BATCH:

    DBUG_PRINT("info", ("HA_EXTRA_UPDATE_CANNOT_BATCH"));

    m_update_cannot_batch= TRUE;

    break;

  // We don't implement 'KEYREAD'. However, KEYREAD also implies DISABLE_JOINPUSH.

  case HA_EXTRA_KEYREAD:

    DBUG_PRINT("info", ("HA_EXTRA_KEYREAD"));

    m_disable_pushed_join= TRUE;

    break;

  case HA_EXTRA_NO_KEYREAD:

    DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD"));

    m_disable_pushed_join= FALSE;

    break;

  default:

    break;

  }


  DBUG_RETURN(0);

}


bool ha_ndbcluster::start_read_removal()

{

  THD *thd= table->in_use;

  DBUG_ENTER("start_read_removal");


  if (uses_blob_value(table->write_set))

  {

    DBUG_PRINT("exit", ("No! Blob field in write_set"));

    DBUG_RETURN(false);

  }


  if (thd->lex->sql_command == SQLCOM_DELETE &&

      table_share->blob_fields)

  {

    DBUG_PRINT("exit", ("No! DELETE from table with blob(s)"));

    DBUG_RETURN(false);

  }


  if (table_share->primary_key == MAX_KEY)

  {

    DBUG_PRINT("exit", ("No! Table with hidden key"));

    DBUG_RETURN(false);

  }


  if (bitmap_is_overlapping(table->write_set, m_pk_bitmap_p))

  {

    DBUG_PRINT("exit", ("No! Updating primary key"));

    DBUG_RETURN(false);

  }


  if (m_has_unique_index)

  {

    for (uint i= 0; i < table_share->keys; i++)

    {

      const KEY* key= table->key_info + i;

      if ((key->flags & HA_NOSAME) &&

          bitmap_is_overlapping(table->write_set,

                                m_key_fields[i]))

      {

        DBUG_PRINT("exit", ("No! Unique key %d is updated", i));

        DBUG_RETURN(false);

      }

    }

  }

  m_read_before_write_removal_possible= TRUE;

  DBUG_PRINT("exit", ("Yes, rbwr is possible!"));

  DBUG_RETURN(true);

}


ha_rows ha_ndbcluster::end_read_removal(void)

{

  DBUG_ENTER("end_read_removal");

  DBUG_ASSERT(m_read_before_write_removal_possible);

  DBUG_PRINT("info", ("updated: %llu, deleted: %llu",

                      m_rows_updated, m_rows_deleted));

  DBUG_RETURN(m_rows_updated + m_rows_deleted);

}


int ha_ndbcluster::reset()

{

  DBUG_ENTER("ha_ndbcluster::reset");

  if (m_cond)

  {

    m_cond->cond_clear();

  }

#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

  DBUG_ASSERT(m_active_query == NULL);

  if (m_pushed_join_operation==PUSHED_ROOT)  // Root of pushed query

  {

    delete m_pushed_join_member;             // Also delete QueryDef

  }

  m_pushed_join_member= NULL;

  m_pushed_join_operation= -1;

  m_disable_pushed_join= FALSE;

#endif


#if 0

  // Magnus, disble this "hack" until it's possible to test if

  // it's still needed

  /*

    Regular partition pruning will set the bitmap appropriately.

    Some queries like ALTER TABLE doesn't use partition pruning and

    thus the 'used_partitions' bitmap needs to be initialized

  */

  if (m_part_info)

    bitmap_set_all(&m_part_info->used_partitions);

#endif


  /* reset flags set by extra calls */

  m_read_before_write_removal_possible= FALSE;

  m_read_before_write_removal_used= FALSE;

  m_rows_updated= m_rows_deleted= 0;

  m_ignore_dup_key= FALSE;

  m_use_write= FALSE;

  m_ignore_no_key= FALSE;

  m_rows_inserted= (ha_rows) 0;

  m_rows_to_insert= (ha_rows) 1;

  m_delete_cannot_batch= FALSE;

  m_update_cannot_batch= FALSE;


  assert(m_is_bulk_delete == false);

  m_is_bulk_delete = false;

  DBUG_RETURN(0);

}


int

ha_ndbcluster::flush_bulk_insert(bool allow_batch)

{

  NdbTransaction *trans= m_thd_ndb->trans;

  DBUG_ENTER("ha_ndbcluster::flush_bulk_insert");

  DBUG_PRINT("info", ("Sending inserts to NDB, rows_inserted: %d",

                      (int)m_rows_inserted));

  DBUG_ASSERT(trans);


  if (! (m_thd_ndb->trans_options & TNTO_TRANSACTIONS_OFF))

  {

    if (!allow_batch &&

        execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)

    {

      no_uncommitted_rows_execute_failure();

      DBUG_RETURN(ndb_err(trans));

    }

  }

  else

  {

    /*

      signal that transaction has been broken up and hence cannot

      be rolled back

    */

    THD *thd= table->in_use;

    thd->transaction.all.mark_modified_non_trans_table();

    thd->transaction.stmt.mark_modified_non_trans_table();

    if (execute_commit(thd, m_thd_ndb, trans, m_thd_ndb->m_force_send,

                       m_ignore_no_key) != 0)

    {

      no_uncommitted_rows_execute_failure();

      DBUG_RETURN(ndb_err(trans));

    }

    if (trans->restart() != 0)

    {

      DBUG_ASSERT(0);

      DBUG_RETURN(-1);

    }

  }

  DBUG_RETURN(0);

}


void ha_ndbcluster::start_bulk_insert(ha_rows rows)

{

  DBUG_ENTER("start_bulk_insert");

  DBUG_PRINT("enter", ("rows: %d", (int)rows));


  m_rows_inserted= (ha_rows) 0;

  if (!m_use_write && m_ignore_dup_key)

  {

    /*

      compare if expression with that in write_row

      we have a situation where peek_indexed_rows() will be called

      so we cannot batch

    */

    DBUG_PRINT("info", ("Batching turned off as duplicate key is "

                        "ignored by using peek_row"));

    m_rows_to_insert= 1;

    DBUG_VOID_RETURN;

  }

  if (rows == (ha_rows) 0)

  {

    /* We don't know how many will be inserted, guess */

    m_rows_to_insert=

      (m_autoincrement_prefetch > DEFAULT_AUTO_PREFETCH)

      ? m_autoincrement_prefetch

      : DEFAULT_AUTO_PREFETCH;

    m_autoincrement_prefetch= m_rows_to_insert;

  }

  else

  {

    m_rows_to_insert= rows;

    if (m_autoincrement_prefetch < m_rows_to_insert)

      m_autoincrement_prefetch= m_rows_to_insert;

  }


  DBUG_VOID_RETURN;

}


int ha_ndbcluster::end_bulk_insert()

{

  int error= 0;


  DBUG_ENTER("end_bulk_insert");

  // Check if last inserts need to be flushed


  THD *thd= table->in_use;

  Thd_ndb *thd_ndb= m_thd_ndb;


  if (!thd_allow_batch(thd) && thd_ndb->m_unsent_bytes)

  {

    bool allow_batch= (thd_ndb->m_handler != 0);

    error= flush_bulk_insert(allow_batch);

    if (error != 0)

      my_errno= error;

  }


  m_rows_inserted= (ha_rows) 0;

  m_rows_to_insert= (ha_rows) 1;

  DBUG_RETURN(error);

}


int ha_ndbcluster::extra_opt(enum ha_extra_function operation, ulong cache_size)

{

  DBUG_ENTER("extra_opt");

  DBUG_PRINT("enter", ("cache_size: %lu", cache_size));

  DBUG_RETURN(extra(operation));

}


static const char *ha_ndbcluster_exts[] = {

 ha_ndb_ext,

 NullS

};


const char** ha_ndbcluster::bas_ext() const

{

  return ha_ndbcluster_exts;

}


double ha_ndbcluster::scan_time()

{

  DBUG_ENTER("ha_ndbcluster::scan_time()");

  double res= rows2double(stats.records*1000);

  DBUG_PRINT("exit", ("table: %s value: %f",

                      m_tabname, res));

  DBUG_RETURN(res);

}


/*

  Convert MySQL table locks into locks supported by Ndb Cluster.

  Note that MySQL Cluster does currently not support distributed

  table locks, so to be safe one should set cluster in Single

  User Mode, before relying on table locks when updating tables

  from several MySQL servers

*/


THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd,

                                          THR_LOCK_DATA **to,

                                          enum thr_lock_type lock_type)

{

  DBUG_ENTER("store_lock");

  if (lock_type != TL_IGNORE && m_lock.type == TL_UNLOCK)

  {


    /* If we are not doing a LOCK TABLE, then allow multiple

       writers */


    /* Since NDB does not currently have table locks

       this is treated as a ordinary lock */


    const bool in_lock_tables = thd_in_lock_tables(thd);

    const uint sql_command = thd_sql_command(thd);

    if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&

         lock_type <= TL_WRITE) &&

        !(in_lock_tables && sql_command == SQLCOM_LOCK_TABLES))

      lock_type= TL_WRITE_ALLOW_WRITE;


    /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...

       MySQL would use the lock TL_READ_NO_INSERT on t2, and that

       would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts

       to t2. Convert the lock to a normal read lock to allow

       concurrent inserts to t2. */


    if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)

      lock_type= TL_READ;


    if (sql_command == SQLCOM_ALTER_TABLE)

      lock_type = TL_WRITE;


    m_lock.type=lock_type;

  }

  *to++= &m_lock;


  DBUG_PRINT("exit", ("lock_type: %d", lock_type));


  DBUG_RETURN(to);

}


/*

  As MySQL will execute an external lock for every new table it uses

  we can use this to start the transactions.

  If we are in auto_commit mode we just need to start a transaction

  for the statement, this will be stored in thd_ndb.stmt.

  If not, we have to start a master transaction if there doesn't exist

  one from before, this will be stored in thd_ndb.all


  When a table lock is held one transaction will be started which holds

  the table lock and for each statement a hupp transaction will be started

  If we are locking the table then:

  - save the NdbDictionary::Table for easy access

  - save reference to table statistics

  - refresh list of the indexes for the table if needed (if altered)

 */


#ifdef HAVE_NDB_BINLOG

static int ndbcluster_update_apply_status(THD *thd, int do_update)

{

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  Ndb *ndb= thd_ndb->ndb;

  NDBDICT *dict= ndb->getDictionary();

  const NDBTAB *ndbtab;

  NdbTransaction *trans= thd_ndb->trans;

  ndb->setDatabaseName(NDB_REP_DB);

  Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);

  if (!(ndbtab= ndbtab_g.get_table()))

  {

    return -1;

  }

  NdbOperation *op= 0;

  int r= 0;

  r|= (op= trans->getNdbOperation(ndbtab)) == 0;

  DBUG_ASSERT(r == 0);

  if (do_update)

    r|= op->updateTuple();

  else

    r|= op->writeTuple();

  DBUG_ASSERT(r == 0);

  // server_id

  r|= op->equal(0u, (Uint32)thd->server_id);

  DBUG_ASSERT(r == 0);

  if (!do_update)

  {

    // epoch

    r|= op->setValue(1u, (Uint64)0);

    DBUG_ASSERT(r == 0);

  }

  const char* group_master_log_name =

    ndb_mi_get_group_master_log_name();

  const Uint64 group_master_log_pos =

    ndb_mi_get_group_master_log_pos();

  const Uint64 future_event_relay_log_pos =

    ndb_mi_get_future_event_relay_log_pos();

  const Uint64 group_relay_log_pos =

    ndb_mi_get_group_relay_log_pos();


  // log_name

  char tmp_buf[FN_REFLEN];

  ndb_pack_varchar(ndbtab->getColumn(2u), tmp_buf,

                   group_master_log_name, strlen(group_master_log_name));

  r|= op->setValue(2u, tmp_buf);

  DBUG_ASSERT(r == 0);

  // start_pos

  r|= op->setValue(3u, group_master_log_pos);

  DBUG_ASSERT(r == 0);

  // end_pos

  r|= op->setValue(4u, group_master_log_pos +

                   (future_event_relay_log_pos - group_relay_log_pos));

  DBUG_ASSERT(r == 0);

  return 0;

}

#endif /* HAVE_NDB_BINLOG */


static void transaction_checks(THD *thd, Thd_ndb *thd_ndb)

{

  if (thd->lex->sql_command == SQLCOM_LOAD)

    thd_ndb->trans_options|= TNTO_TRANSACTIONS_OFF;

  else if (!thd->transaction.flags.enabled)

    thd_ndb->trans_options|= TNTO_TRANSACTIONS_OFF;

  else if (!THDVAR(thd, use_transactions))

    thd_ndb->trans_options|= TNTO_TRANSACTIONS_OFF;

  thd_ndb->m_force_send= THDVAR(thd, force_send);

  if (!thd->slave_thread)

    thd_ndb->m_batch_size= THDVAR(thd, batch_size);

  else

  {

    thd_ndb->m_batch_size= THDVAR(NULL, batch_size); /* using global value */

    /* Do not use hinted TC selection in slave thread */

    THDVAR(thd, optimized_node_selection)=

      THDVAR(NULL, optimized_node_selection) & 1; /* using global value */

  }

}


int ha_ndbcluster::start_statement(THD *thd,

                                   Thd_ndb *thd_ndb,

                                   uint table_count)

{

  NdbTransaction *trans= thd_ndb->trans;

  int error;

  DBUG_ENTER("ha_ndbcluster::start_statement");


  m_thd_ndb= thd_ndb;

  transaction_checks(thd, m_thd_ndb);


  if (table_count == 0)

  {

    trans_register_ha(thd, FALSE, ndbcluster_hton);

    if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))

    {

      if (!trans)

        trans_register_ha(thd, TRUE, ndbcluster_hton);

      thd_ndb->m_handler= NULL;

    }

    else

    {

      /*

        this is an autocommit, we may keep a reference to the

        handler to be used in the commit phase for optimization

        reasons, defering execute

      */

      thd_ndb->m_handler= this;

    }

  }

  else

  {

    /*

      there is more than one handler involved, execute deferal

      not possible

    */

    ha_ndbcluster* handler = thd_ndb->m_handler;

    thd_ndb->m_handler= NULL;

    if (handler != NULL)

    {

      add_handler_to_open_tables(thd, thd_ndb, handler);

    }

  }

  if (!trans && table_count == 0)

  {

    DBUG_ASSERT(thd_ndb->changed_tables.is_empty() == TRUE);

    thd_ndb->trans_options= 0;


    DBUG_PRINT("trans",("Possibly starting transaction"));

    const uint opti_node_select = THDVAR(thd, optimized_node_selection);

    DBUG_PRINT("enter", ("optimized_node_selection: %u", opti_node_select));

    if (!(opti_node_select & 2) ||

        thd->lex->sql_command == SQLCOM_LOAD)

      if (unlikely(!start_transaction(error)))

        DBUG_RETURN(error);


    thd_ndb->init_open_tables();

    thd_ndb->m_slow_path= FALSE;

    if (!(thd_options(thd) & OPTION_BIN_LOG) ||

        thd->variables.binlog_format == BINLOG_FORMAT_STMT)

    {

      thd_ndb->trans_options|= TNTO_NO_LOGGING;

      thd_ndb->m_slow_path= TRUE;

    }

    else if (thd->slave_thread)

      thd_ndb->m_slow_path= TRUE;

  }

  /*

    If this is the start of a LOCK TABLE, a table look

    should be taken on the table in NDB


    Check if it should be read or write lock

  */

  if (thd_options(thd) & (OPTION_TABLE_LOCK))

  {

    /* This is currently dead code in wait for implementation in NDB */

    /* lockThisTable(); */

    DBUG_PRINT("info", ("Locking the table..." ));

#ifdef NOT_YET

    push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                        ER_GET_ERRMSG, ER(ER_GET_ERRMSG), 0,

                        "Table only locked locally in this mysqld", "NDB");

#endif

  }

  DBUG_RETURN(0);

}


int

ha_ndbcluster::add_handler_to_open_tables(THD *thd,

                                          Thd_ndb *thd_ndb,

                                          ha_ndbcluster* handler)

{

  DBUG_ENTER("ha_ndbcluster::add_handler_to_open_tables");

  DBUG_PRINT("info", ("Adding %s", handler->m_share->key));


  DBUG_ASSERT(thd_ndb->m_handler == NULL);

  const void *key= handler->m_share;

  HASH_SEARCH_STATE state;

  THD_NDB_SHARE *thd_ndb_share=

    (THD_NDB_SHARE*)my_hash_first(&thd_ndb->open_tables,

                                  (const uchar *)&key, sizeof(key),

                                  &state);

  while (thd_ndb_share && thd_ndb_share->key != key)

  {

    thd_ndb_share=

      (THD_NDB_SHARE*)my_hash_next(&thd_ndb->open_tables,

                                   (const uchar *)&key, sizeof(key),

                                   &state);

  }

  if (thd_ndb_share == 0)

  {

    thd_ndb_share= (THD_NDB_SHARE *) alloc_root(&thd->transaction.mem_root,

                                                sizeof(THD_NDB_SHARE));

    if (!thd_ndb_share)

    {

      mem_alloc_error(sizeof(THD_NDB_SHARE));

      DBUG_RETURN(1);

    }

    thd_ndb_share->key= key;

    thd_ndb_share->stat.last_count= thd_ndb->count;

    thd_ndb_share->stat.no_uncommitted_rows_count= 0;

    thd_ndb_share->stat.records= ~(ha_rows)0;

    my_hash_insert(&thd_ndb->open_tables, (uchar *)thd_ndb_share);

  }

  else if (thd_ndb_share->stat.last_count != thd_ndb->count)

  {

    thd_ndb_share->stat.last_count= thd_ndb->count;

    thd_ndb_share->stat.no_uncommitted_rows_count= 0;

    thd_ndb_share->stat.records= ~(ha_rows)0;

  }


  handler->m_table_info= &thd_ndb_share->stat;

  DBUG_RETURN(0);

}


int ha_ndbcluster::init_handler_for_statement(THD *thd)

{

  /*

    This is the place to make sure this handler instance

    has a started transaction.


    The transaction is started by the first handler on which

    MySQL Server calls external lock


    Other handlers in the same stmt or transaction should use

    the same NDB transaction. This is done by setting up the m_thd_ndb

    pointer to point to the NDB transaction object.

   */


  DBUG_ENTER("ha_ndbcluster::init_handler_for_statement");

  Thd_ndb *thd_ndb= m_thd_ndb;

  DBUG_ASSERT(thd_ndb);


  // store thread specific data first to set the right context

  m_autoincrement_prefetch= THDVAR(thd, autoincrement_prefetch_sz);

  // Start of transaction

  m_rows_changed= 0;

  m_blobs_pending= FALSE;

  release_blobs_buffer();

  m_slow_path= m_thd_ndb->m_slow_path;

#ifdef HAVE_NDB_BINLOG

  if (unlikely(m_slow_path))

  {

    if (m_share == ndb_apply_status_share && thd->slave_thread)

        m_thd_ndb->trans_options|= TNTO_INJECTED_APPLY_STATUS;

  }

#endif


  int ret = 0;

  if (thd_ndb->m_handler == 0)

  {

    DBUG_ASSERT(m_share);

    ret = add_handler_to_open_tables(thd, thd_ndb, this);

  }

  else

  {

    struct Ndb_local_table_statistics &stat= m_table_info_instance;

    stat.last_count= thd_ndb->count;

    stat.no_uncommitted_rows_count= 0;

    stat.records= ~(ha_rows)0;

    m_table_info= &stat;

  }

  DBUG_RETURN(ret);

}


int ha_ndbcluster::external_lock(THD *thd, int lock_type)

{

  DBUG_ENTER("external_lock");

  if (lock_type != F_UNLCK)

  {

    int error;

    /*

      Check that this handler instance has a connection

      set up to the Ndb object of thd

    */

    if (check_ndb_connection(thd))

      DBUG_RETURN(1);

    Thd_ndb *thd_ndb= get_thd_ndb(thd);


    DBUG_PRINT("enter", ("lock_type != F_UNLCK "

                         "this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "

                         "thd_ndb->lock_count: %d",

                         (long) this, (long) thd, (long) thd_ndb,

                         thd_ndb->lock_count));


    if ((error= start_statement(thd, thd_ndb,

                                thd_ndb->lock_count++)))

    {

      thd_ndb->lock_count--;

      DBUG_RETURN(error);

    }

    if ((error= init_handler_for_statement(thd)))

    {

      thd_ndb->lock_count--;

      DBUG_RETURN(error);

    }

    DBUG_RETURN(0);

  }

  else

  {

    Thd_ndb *thd_ndb= m_thd_ndb;

    DBUG_ASSERT(thd_ndb);


    DBUG_PRINT("enter", ("lock_type == F_UNLCK "

                         "this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "

                         "thd_ndb->lock_count: %d",

                         (long) this, (long) thd, (long) thd_ndb,

                         thd_ndb->lock_count));


    if (m_rows_changed && global_system_variables.query_cache_type)

    {

      DBUG_PRINT("info", ("Rows has changed"));


      if (thd_ndb->trans &&

          thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))

      {

        DBUG_PRINT("info", ("Add share to list of changed tables, %p",

                            m_share));

        /* NOTE push_back allocates memory using transactions mem_root! */

        thd_ndb->changed_tables.push_back(get_share(m_share),

                                          &thd->transaction.mem_root);

      }


      if (opt_ndb_cache_check_time)

      {

        pthread_mutex_lock(&m_share->mutex);

        DBUG_PRINT("info", ("Invalidating commit_count"));

        m_share->commit_count= 0;

        m_share->commit_count_lock++;

        pthread_mutex_unlock(&m_share->mutex);

      }

    }


    if (!--thd_ndb->lock_count)

    {

      DBUG_PRINT("trans", ("Last external_lock"));


      if ((!(thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) &&

          thd_ndb->trans)

      {

        if (thd_ndb->trans)

        {

          /*

            Unlock is done without a transaction commit / rollback.

            This happens if the thread didn't update any rows

            We must in this case close the transaction to release resources

          */

          DBUG_PRINT("trans",("ending non-updating transaction"));

          thd_ndb->ndb->closeTransaction(thd_ndb->trans);

          thd_ndb->trans= NULL;

          thd_ndb->m_handler= NULL;

        }

      }

    }

    m_table_info= NULL;


    /*

      This is the place to make sure this handler instance

      no longer are connected to the active transaction.


      And since the handler is no longer part of the transaction

      it can't have open cursors, ops, queries or blobs pending.

    */

    m_thd_ndb= NULL;


    if (m_active_query)

      DBUG_PRINT("warning", ("m_active_query != NULL"));

    m_active_query= NULL;


    if (m_active_cursor)

      DBUG_PRINT("warning", ("m_active_cursor != NULL"));

    m_active_cursor= NULL;


    if (m_multi_cursor)

      DBUG_PRINT("warning", ("m_multi_cursor != NULL"));

    m_multi_cursor= NULL;


    if (m_blobs_pending)

      DBUG_PRINT("warning", ("blobs_pending != 0"));

    m_blobs_pending= 0;


    DBUG_RETURN(0);

  }

}


void ha_ndbcluster::unlock_row()

{

  DBUG_ENTER("unlock_row");


  DBUG_PRINT("info", ("Unlocking row"));

  m_lock_tuple= FALSE;

  DBUG_VOID_RETURN;

}


int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type)

{

  int error=0;

  Thd_ndb *thd_ndb;

  DBUG_ENTER("start_stmt");

  DBUG_ASSERT(thd == table->in_use);


  thd_ndb= get_thd_ndb(thd);

  if ((error= start_statement(thd, thd_ndb, thd_ndb->start_stmt_count++)))

    goto error;

  if ((error= init_handler_for_statement(thd)))

    goto error;

  DBUG_RETURN(0);

error:

  thd_ndb->start_stmt_count--;

  DBUG_RETURN(error);

}


NdbTransaction *

ha_ndbcluster::start_transaction_row(const NdbRecord *ndb_record,

                                     const uchar *record,

                                     int &error)

{

  NdbTransaction *trans;

  DBUG_ENTER("ha_ndbcluster::start_transaction_row");

  DBUG_ASSERT(m_thd_ndb);

  DBUG_ASSERT(m_thd_ndb->trans == NULL);


  transaction_checks(table->in_use, m_thd_ndb);


  Ndb *ndb= m_thd_ndb->ndb;


  Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];

  char *buf= (char*)&tmp[0];

  trans= ndb->startTransaction(ndb_record,

                               (const char*)record,

                               buf, sizeof(tmp));


  if (trans)

  {

    m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;

    DBUG_PRINT("info", ("Delayed allocation of TC"));

    DBUG_RETURN(m_thd_ndb->trans= trans);

  }


  ERR_SET(m_thd_ndb->ndb->getNdbError(), error);

  DBUG_RETURN(NULL);

}


NdbTransaction *

ha_ndbcluster::start_transaction_key(uint inx_no,

                                     const uchar *key_data,

                                     int &error)

{

  NdbTransaction *trans;

  DBUG_ENTER("ha_ndbcluster::start_transaction_key");

  DBUG_ASSERT(m_thd_ndb);

  DBUG_ASSERT(m_thd_ndb->trans == NULL);


  transaction_checks(table->in_use, m_thd_ndb);


  Ndb *ndb= m_thd_ndb->ndb;

  const NdbRecord *key_rec= m_index[inx_no].ndb_unique_record_key;


  Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];

  char *buf= (char*)&tmp[0];

  trans= ndb->startTransaction(key_rec,

                               (const char*)key_data,

                               buf, sizeof(tmp));


  if (trans)

  {

    m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;

    DBUG_PRINT("info", ("Delayed allocation of TC"));

    DBUG_RETURN(m_thd_ndb->trans= trans);

  }


  ERR_SET(m_thd_ndb->ndb->getNdbError(), error);

  DBUG_RETURN(NULL);

}


NdbTransaction *

ha_ndbcluster::start_transaction(int &error)

{

  NdbTransaction *trans;

  DBUG_ENTER("ha_ndbcluster::start_transaction");


  DBUG_ASSERT(m_thd_ndb);

  DBUG_ASSERT(m_thd_ndb->trans == NULL);


  transaction_checks(table->in_use, m_thd_ndb);

  const uint opti_node_select= THDVAR(table->in_use, optimized_node_selection);

  m_thd_ndb->connection->set_optimized_node_selection(opti_node_select & 1);

  if ((trans= m_thd_ndb->ndb->startTransaction()))

  {

    m_thd_ndb->m_transaction_no_hint_count[trans->getConnectedNodeId()]++;

    DBUG_PRINT("info", ("Delayed allocation of TC"));

    DBUG_RETURN(m_thd_ndb->trans= trans);

  }


  ERR_SET(m_thd_ndb->ndb->getNdbError(), error);

  DBUG_RETURN(NULL);

}


NdbTransaction *

ha_ndbcluster::start_transaction_part_id(Uint32 part_id, int &error)

{

  NdbTransaction *trans;

  DBUG_ENTER("ha_ndbcluster::start_transaction_part_id");


  DBUG_ASSERT(m_thd_ndb);

  DBUG_ASSERT(m_thd_ndb->trans == NULL);


  transaction_checks(table->in_use, m_thd_ndb);

  if ((trans= m_thd_ndb->ndb->startTransaction(m_table, part_id)))

  {

    m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;

    DBUG_PRINT("info", ("Delayed allocation of TC"));

    DBUG_RETURN(m_thd_ndb->trans= trans);

  }


  ERR_SET(m_thd_ndb->ndb->getNdbError(), error);

  DBUG_RETURN(NULL);

}


int ndbcluster_commit(handlerton *hton, THD *thd, bool all)

{

  int res= 0;

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  Ndb *ndb= thd_ndb->ndb;

  NdbTransaction *trans= thd_ndb->trans;


  DBUG_ENTER("ndbcluster_commit");

  DBUG_ASSERT(ndb);

  DBUG_PRINT("enter", ("Commit %s", (all ? "all" : "stmt")));

  thd_ndb->start_stmt_count= 0;

  if (trans == NULL)

  {

    DBUG_PRINT("info", ("trans == NULL"));

    DBUG_RETURN(0);

  }

  if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))

  {

    /*

      An odditity in the handler interface is that commit on handlerton

      is called to indicate end of statement only in cases where

      autocommit isn't used and the all flag isn't set.


      We also leave quickly when a transaction haven't even been started,

      in this case we are safe that no clean up is needed. In this case

      the MySQL Server could handle the query without contacting the

      NDB kernel.

    */

    thd_ndb->save_point_count++;

    DBUG_PRINT("info", ("Commit before start or end-of-statement only"));

    DBUG_RETURN(0);

  }

  thd_ndb->save_point_count= 0;


#ifdef HAVE_NDB_BINLOG

  if (unlikely(thd_ndb->m_slow_path))

  {

    if (thd->slave_thread)

      ndbcluster_update_apply_status

        (thd, thd_ndb->trans_options & TNTO_INJECTED_APPLY_STATUS);

  }

#endif /* HAVE_NDB_BINLOG */


  if (thd->slave_thread)

  {

    if (!g_ndb_slave_state.current_conflict_defined_op_count ||

        !thd_ndb->m_unsent_bytes ||

        !(res= execute_no_commit(thd_ndb, trans, TRUE)))

      res= execute_commit(thd, thd_ndb, trans, 1, TRUE);


    update_slave_api_stats(thd_ndb->ndb);

  }

  else

  {

    if (thd_ndb->m_handler &&

        thd_ndb->m_handler->m_read_before_write_removal_possible)

    {

      /*

        This is an autocommit involving only one table and

        rbwr is on, thus the transaction has already been

        committed in exec_bulk_update() or end_bulk_delete()

      */

      DBUG_PRINT("info", ("autocommit+rbwr, transaction already comitted"));

      if (trans->commitStatus() != NdbTransaction::Committed)

      {

        sql_print_error("found uncomitted autocommit+rbwr transaction, "

                        "commit status: %d", trans->commitStatus());

        abort();

      }

    }

    else

      res= execute_commit(thd, thd_ndb, trans, THDVAR(thd, force_send), FALSE);

  }


  if (res != 0)

  {

    const NdbError err= trans->getNdbError();

    const NdbOperation *error_op= trans->getNdbErrorOperation();

    res= ndb_to_mysql_error(&err);

    if (res != -1)

      ndbcluster_print_error(res, error_op);

  }

  else

  {

    /* Update shared statistics for tables inserted into / deleted from*/

    if (thd_ndb->m_handler &&      // Autocommit Txn

        thd_ndb->m_handler->m_share &&

        thd_ndb->m_handler->m_table_info)

    {

      modify_shared_stats(thd_ndb->m_handler->m_share, thd_ndb->m_handler->m_table_info);

    }


    /* Manual commit: Update all affected NDB_SHAREs found in 'open_tables' */

    for (uint i= 0; i<thd_ndb->open_tables.records; i++)

    {

      THD_NDB_SHARE *thd_share=

        (THD_NDB_SHARE*)my_hash_element(&thd_ndb->open_tables, i);

      modify_shared_stats((NDB_SHARE*)thd_share->key, &thd_share->stat);

    }

  }


  ndb->closeTransaction(trans);

  thd_ndb->trans= NULL;

  thd_ndb->m_handler= NULL;


  /* Clear commit_count for tables changed by transaction */

  NDB_SHARE* share;

  List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);

  while ((share= it++))

  {

    DBUG_PRINT("info", ("Remove share to list of changed tables, %p",

                        share));

    pthread_mutex_lock(&share->mutex);

    DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %lu",

                        share->table_name, (ulong) share->commit_count));

    share->commit_count= 0;

    share->commit_count_lock++;

    pthread_mutex_unlock(&share->mutex);

    free_share(&share);

  }

  thd_ndb->changed_tables.empty();


  DBUG_RETURN(res);

}


static int ndbcluster_rollback(handlerton *hton, THD *thd, bool all)

{

  int res= 0;

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  Ndb *ndb= thd_ndb->ndb;

  NdbTransaction *trans= thd_ndb->trans;


  DBUG_ENTER("ndbcluster_rollback");

  DBUG_PRINT("enter", ("all: %d  thd_ndb->save_point_count: %d",

                       all, thd_ndb->save_point_count));

  DBUG_ASSERT(ndb);

  thd_ndb->start_stmt_count= 0;

  if (trans == NULL)

  {

    /* Ignore end-of-statement until real rollback or commit is called */

    DBUG_PRINT("info", ("trans == NULL"));

    DBUG_RETURN(0);

  }

  if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&

      (thd_ndb->save_point_count > 0))

  {

    /*

      Ignore end-of-statement until real rollback or commit is called

      as ndb does not support rollback statement

      - mark that rollback was unsuccessful, this will cause full rollback

      of the transaction

    */

    DBUG_PRINT("info", ("Rollback before start or end-of-statement only"));

    mark_transaction_to_rollback(thd, 1);

    my_error(ER_WARN_ENGINE_TRANSACTION_ROLLBACK, MYF(0), "NDB");

    DBUG_RETURN(0);

  }

  thd_ndb->save_point_count= 0;

  if (thd->slave_thread)

    g_ndb_slave_state.atTransactionAbort();

  thd_ndb->m_unsent_bytes= 0;

  thd_ndb->m_execute_count++;

  DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));

  if (trans->execute(NdbTransaction::Rollback) != 0)

  {

    const NdbError err= trans->getNdbError();

    const NdbOperation *error_op= trans->getNdbErrorOperation();

    res= ndb_to_mysql_error(&err);

    if (res != -1)

      ndbcluster_print_error(res, error_op);

  }

  ndb->closeTransaction(trans);

  thd_ndb->trans= NULL;

  thd_ndb->m_handler= NULL;


  /* Clear list of tables changed by transaction */

  NDB_SHARE* share;

  List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);

  while ((share= it++))

  {

    DBUG_PRINT("info", ("Remove share to list of changed tables, %p",

                        share));

    free_share(&share);

  }

  thd_ndb->changed_tables.empty();


  if (thd->slave_thread)

    update_slave_api_stats(thd_ndb->ndb);


  DBUG_RETURN(res);

}


struct NDB_Modifier

{

  enum { M_BOOL } m_type;

  const char * m_name;

  size_t m_name_len;

  bool m_found;

  union {

    bool m_val_bool;

#ifdef TODO__

    int m_val_int;

    struct {

      const char * str;

      size_t len;

    } m_val_str;

#endif

  };

};


static const

struct NDB_Modifier ndb_table_modifiers[] =

{

  { NDB_Modifier::M_BOOL, STRING_WITH_LEN("NOLOGGING"), 0, {0} },

  { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }

};


static const

struct NDB_Modifier ndb_column_modifiers[] =

{

  { NDB_Modifier::M_BOOL, STRING_WITH_LEN("MAX_BLOB_PART_SIZE"), 0, {0} },

  { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }

};


class NDB_Modifiers

{

public:

  NDB_Modifiers(const NDB_Modifier modifiers[]);

  ~NDB_Modifiers();


  int parse(THD* thd, const char * prefix, const char * str, size_t strlen);


  const NDB_Modifier * get(const char * name) const;

private:

  uint m_len;

  struct NDB_Modifier * m_modifiers;


  int parse_modifier(THD *thd, const char * prefix,

                     struct NDB_Modifier* m, const char * str);

};


static

bool

end_of_token(const char * str)

{

  return str[0] == 0 || str[0] == ' ' || str[0] == ',';

}


NDB_Modifiers::NDB_Modifiers(const NDB_Modifier modifiers[])

{

  for (m_len = 0; modifiers[m_len].m_name != 0; m_len++)

  {}

  m_modifiers = new NDB_Modifier[m_len];

  memcpy(m_modifiers, modifiers, m_len * sizeof(NDB_Modifier));

}


NDB_Modifiers::~NDB_Modifiers()

{

  delete [] m_modifiers;

}


int

NDB_Modifiers::parse_modifier(THD *thd,

                              const char * prefix,

                              struct NDB_Modifier* m,

                              const char * str)

{

  if (m->m_found)

  {

    push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                        ER_ILLEGAL_HA_CREATE_OPTION,

                        "%s : modifier %s specified twice",

                        prefix, m->m_name);

  }


  switch(m->m_type){

  case NDB_Modifier::M_BOOL:

    if (end_of_token(str))

    {

      m->m_val_bool = true;

      goto found;

    }

    if (str[0] != '=')

      break;


    str++;

    if (str[0] == '1' && end_of_token(str+1))

    {

      m->m_val_bool = true;

      goto found;

    }


    if (str[0] == '0' && end_of_token(str+1))

    {

      m->m_val_bool = false;

      goto found;

    }

  }


  {

    const char * end = strpbrk(str, " ,");

    if (end)

    {

      push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                          ER_ILLEGAL_HA_CREATE_OPTION,

                          "%s : invalid value '%.*s' for %s",

                          prefix, (int)(end - str), str, m->m_name);

    }

    else

    {

      push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                          ER_ILLEGAL_HA_CREATE_OPTION,

                          "%s : invalid value '%s' for %s",

                          prefix, str, m->m_name);

    }

  }

  return -1;

found:

  m->m_found = true;

  return 0;

}


int

NDB_Modifiers::parse(THD *thd,

                     const char * prefix,

                     const char * _source,

                     size_t _source_len)

{

  if (_source == 0 || _source_len == 0)

    return 0;


  const char * source = 0;


  for (size_t i = 0; i<_source_len; i++)

  {

    if (_source[i] == 0)

    {

      source = _source;

      break;

    }

  }


  if (source == 0)

  {

    char * tmp = new char[_source_len+1];

    if (tmp == 0)

    {

      push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                          ER_ILLEGAL_HA_CREATE_OPTION,

                          "%s : unable to parse due to out of memory",

                          prefix);

      return -1;

    }

    memcpy(tmp, _source, _source_len);

    tmp[_source_len] = 0;

    source = tmp;

  }


  const char * pos = source;

  if ((pos = strstr(pos, prefix)) == 0)

  {

    if (source != _source)

      delete [] source;

    return 0;

  }


  pos += strlen(prefix);


  while (pos && pos[0] != 0 && pos[0] != ' ')

  {

    const char * end = strpbrk(pos, " ,"); // end of current modifier


    for (uint i = 0; i < m_len; i++)

    {

      size_t l = m_modifiers[i].m_name_len;

      if (strncmp(pos, m_modifiers[i].m_name, l) == 0)

      {

        if (! (end_of_token(pos + l) || pos[l] == '='))

          goto unknown;


        pos += l;

        int res = parse_modifier(thd, prefix, m_modifiers+i, pos);


        if (res == -1)

        {

        }


        goto next;

      }

    }


    {

  unknown:

      if (end)

      {

        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                            ER_ILLEGAL_HA_CREATE_OPTION,

                            "%s : unknown modifier: %.*s",

                            prefix, (int)(end - pos), pos);

      }

      else

      {

        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                            ER_ILLEGAL_HA_CREATE_OPTION,

                            "%s : unknown modifier: %s",

                            prefix, pos);

      }

    }


next:

    pos = end;

    if (pos && pos[0] == ',')

      pos++;

  }


  if (source != _source)

    delete [] source;


  return 0;

}


const NDB_Modifier *

NDB_Modifiers::get(const char * name) const

{

  for (uint i = 0; i < m_len; i++)

  {

    if (strcmp(name, m_modifiers[i].m_name) == 0)

    {

      return m_modifiers + i;

    }

  }

  return 0;

}


static bool

ndb_blob_striping()

{

#ifndef DBUG_OFF

  const char* p= getenv("NDB_BLOB_STRIPING");

  if (p != 0 && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')

    return true;

#endif

  return false;

}


#if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)

const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = 2013;

#else

const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = NDB_MAX_TUPLE_SIZE_IN_WORDS;

#endif


static int create_ndb_column(THD *thd,

                             NDBCOL &col,

                             Field *field,

                             HA_CREATE_INFO *create_info

#ifndef NDB_WITHOUT_COLUMN_FORMAT

                             , column_format_type

                               default_format= COLUMN_FORMAT_TYPE_DEFAULT

#endif

                            )

{

  NDBCOL::StorageType type= NDBCOL::StorageTypeMemory;

  bool dynamic= FALSE;


  char buf[MAX_ATTR_DEFAULT_VALUE_SIZE];

  DBUG_ENTER("create_ndb_column");

  // Set name

  if (col.setName(field->field_name))

  {

    DBUG_RETURN(my_errno= errno);

  }

  // Get char set

  CHARSET_INFO *cs= const_cast<CHARSET_INFO*>(field->charset());

  // Set type and sizes

  const enum enum_field_types mysql_type= field->real_type();


  NDB_Modifiers column_modifiers(ndb_column_modifiers);

  column_modifiers.parse(thd, "NDB_COLUMN=",

                         field->comment.str,

                         field->comment.length);


  const NDB_Modifier * mod_maxblob = column_modifiers.get("MAX_BLOB_PART_SIZE");


  {

    /* Clear default value (col obj is reused for whole table def) */

    col.setDefaultValue(NULL, 0);


    /* If the data nodes are capable then set native

     * default.

     */

    bool nativeDefaults =

      ! (thd &&

         (! ndb_native_default_support(get_thd_ndb(thd)->

                                       ndb->getMinDbNodeVersion())));


    if (likely( nativeDefaults ))

    {

      if ((!(field->flags & PRI_KEY_FLAG) ) &&

          type_supports_default_value(mysql_type))

      {

        if (!(field->flags & NO_DEFAULT_VALUE_FLAG))

        {

          my_ptrdiff_t src_offset= field->table->s->default_values

            - field->table->record[0];

          if ((! field->is_real_null(src_offset)) ||

              ((field->flags & NOT_NULL_FLAG)))

          {

            /* Set a non-null native default */

            memset(buf, 0, MAX_ATTR_DEFAULT_VALUE_SIZE);

            get_default_value(buf, field);


            /* For bit columns, default length is rounded up to

               nearest word, ensuring all data sent

            */

            Uint32 defaultLen = field_used_length(field);

            if(field->type() == MYSQL_TYPE_BIT)

              defaultLen = ((defaultLen + 3) /4) * 4;

            col.setDefaultValue(buf, defaultLen);

          }

        }

      }

    }

  }

  switch (mysql_type) {

  // Numeric types

  case MYSQL_TYPE_TINY:

    if (field->flags & UNSIGNED_FLAG)

      col.setType(NDBCOL::Tinyunsigned);

    else

      col.setType(NDBCOL::Tinyint);

    col.setLength(1);

    break;

  case MYSQL_TYPE_SHORT:

    if (field->flags & UNSIGNED_FLAG)

      col.setType(NDBCOL::Smallunsigned);

    else

      col.setType(NDBCOL::Smallint);

    col.setLength(1);

    break;

  case MYSQL_TYPE_LONG:

    if (field->flags & UNSIGNED_FLAG)

      col.setType(NDBCOL::Unsigned);

    else

      col.setType(NDBCOL::Int);

    col.setLength(1);

    break;

  case MYSQL_TYPE_INT24:

    if (field->flags & UNSIGNED_FLAG)

      col.setType(NDBCOL::Mediumunsigned);

    else

      col.setType(NDBCOL::Mediumint);

    col.setLength(1);

    break;

  case MYSQL_TYPE_LONGLONG:

    if (field->flags & UNSIGNED_FLAG)

      col.setType(NDBCOL::Bigunsigned);

    else

      col.setType(NDBCOL::Bigint);

    col.setLength(1);

    break;

  case MYSQL_TYPE_FLOAT:

    col.setType(NDBCOL::Float);

    col.setLength(1);

    break;

  case MYSQL_TYPE_DOUBLE:

    col.setType(NDBCOL::Double);

    col.setLength(1);

    break;

  case MYSQL_TYPE_DECIMAL:

    {

      Field_decimal *f= (Field_decimal*)field;

      uint precision= f->pack_length();

      uint scale= f->decimals();

      if (field->flags & UNSIGNED_FLAG)

      {

        col.setType(NDBCOL::Olddecimalunsigned);

        precision-= (scale > 0);

      }

      else

      {

        col.setType(NDBCOL::Olddecimal);

        precision-= 1 + (scale > 0);

      }

      col.setPrecision(precision);

      col.setScale(scale);

      col.setLength(1);

    }

    break;

  case MYSQL_TYPE_NEWDECIMAL:

    {

      Field_new_decimal *f= (Field_new_decimal*)field;

      uint precision= f->precision;

      uint scale= f->decimals();

      if (field->flags & UNSIGNED_FLAG)

      {

        col.setType(NDBCOL::Decimalunsigned);

      }

      else

      {

        col.setType(NDBCOL::Decimal);

      }

      col.setPrecision(precision);

      col.setScale(scale);

      col.setLength(1);

    }

    break;

  // Date types

  case MYSQL_TYPE_DATETIME:

    col.setType(NDBCOL::Datetime);

    col.setLength(1);

    break;

  case MYSQL_TYPE_DATE: // ?

    col.setType(NDBCOL::Char);

    col.setLength(field->pack_length());

    break;

  case MYSQL_TYPE_NEWDATE:

    col.setType(NDBCOL::Date);

    col.setLength(1);

    break;

  case MYSQL_TYPE_TIME:

    col.setType(NDBCOL::Time);

    col.setLength(1);

    break;

  case MYSQL_TYPE_YEAR:

    col.setType(NDBCOL::Year);

    col.setLength(1);

    break;

  case MYSQL_TYPE_TIMESTAMP:

    col.setType(NDBCOL::Timestamp);

    col.setLength(1);

    break;

  // Char types

  case MYSQL_TYPE_STRING:

    if (field->pack_length() == 0)

    {

      col.setType(NDBCOL::Bit);

      col.setLength(1);

    }

    else if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)

    {

      col.setType(NDBCOL::Binary);

      col.setLength(field->pack_length());

    }

    else

    {

      col.setType(NDBCOL::Char);

      col.setCharset(cs);

      col.setLength(field->pack_length());

    }

    break;

  case MYSQL_TYPE_VAR_STRING: // ?

  case MYSQL_TYPE_VARCHAR:

    {

      Field_varstring* f= (Field_varstring*)field;

      if (f->length_bytes == 1)

      {

        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)

          col.setType(NDBCOL::Varbinary);

        else {

          col.setType(NDBCOL::Varchar);

          col.setCharset(cs);

        }

      }

      else if (f->length_bytes == 2)

      {

        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)

          col.setType(NDBCOL::Longvarbinary);

        else {

          col.setType(NDBCOL::Longvarchar);

          col.setCharset(cs);

        }

      }

      else

      {

        DBUG_RETURN(HA_ERR_UNSUPPORTED);

      }

      col.setLength(field->field_length);

    }

    break;

  // Blob types (all come in as MYSQL_TYPE_BLOB)

  mysql_type_tiny_blob:

  case MYSQL_TYPE_TINY_BLOB:

    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)

      col.setType(NDBCOL::Blob);

    else {

      col.setType(NDBCOL::Text);

      col.setCharset(cs);

    }

    col.setInlineSize(256);

    // No parts

    col.setPartSize(0);

    col.setStripeSize(ndb_blob_striping() ? 0 : 0);

    break;

  //mysql_type_blob:

  case MYSQL_TYPE_GEOMETRY:

  case MYSQL_TYPE_BLOB:

    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)

      col.setType(NDBCOL::Blob);

    else {

      col.setType(NDBCOL::Text);

      col.setCharset(cs);

    }

    {

      Field_blob *field_blob= (Field_blob *)field;

      /*

       * max_data_length is 2^8-1, 2^16-1, 2^24-1 for tiny, blob, medium.

       * Tinyblob gets no blob parts.  The other cases are just a crude

       * way to control part size and striping.

       *

       * In mysql blob(256) is promoted to blob(65535) so it does not

       * in fact fit "inline" in NDB.

       */

      if (field_blob->max_data_length() < (1 << 8))

        goto mysql_type_tiny_blob;

      else if (field_blob->max_data_length() < (1 << 16))

      {

        col.setInlineSize(256);

        col.setPartSize(2000);

        col.setStripeSize(ndb_blob_striping() ? 16 : 0);

        if (mod_maxblob->m_found)

        {

          col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));

        }

      }

      else if (field_blob->max_data_length() < (1 << 24))

        goto mysql_type_medium_blob;

      else

        goto mysql_type_long_blob;

    }

    break;

  mysql_type_medium_blob:

  case MYSQL_TYPE_MEDIUM_BLOB:

    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)

      col.setType(NDBCOL::Blob);

    else {

      col.setType(NDBCOL::Text);

      col.setCharset(cs);

    }

    col.setInlineSize(256);

    col.setPartSize(4000);

    col.setStripeSize(ndb_blob_striping() ? 8 : 0);

    if (mod_maxblob->m_found)

    {

      col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));

    }

    break;

  mysql_type_long_blob:

  case MYSQL_TYPE_LONG_BLOB:

    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)

      col.setType(NDBCOL::Blob);

    else {

      col.setType(NDBCOL::Text);

      col.setCharset(cs);

    }

    col.setInlineSize(256);

    col.setPartSize(4 * (OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));

    col.setStripeSize(ndb_blob_striping() ? 4 : 0);

    if (mod_maxblob->m_found)

    {

      col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));

    }

    break;

  // Other types

  case MYSQL_TYPE_ENUM:

    col.setType(NDBCOL::Char);

    col.setLength(field->pack_length());

    break;

  case MYSQL_TYPE_SET:

    col.setType(NDBCOL::Char);

    col.setLength(field->pack_length());

    break;

  case MYSQL_TYPE_BIT:

  {

    int no_of_bits= field->field_length;

    col.setType(NDBCOL::Bit);

    if (!no_of_bits)

      col.setLength(1);

      else

        col.setLength(no_of_bits);

    break;

  }

  case MYSQL_TYPE_NULL:

    goto mysql_type_unsupported;

  mysql_type_unsupported:

  default:

    DBUG_RETURN(HA_ERR_UNSUPPORTED);

  }

  // Set nullable and pk

  col.setNullable(field->maybe_null());

  col.setPrimaryKey(field->flags & PRI_KEY_FLAG);

  if ((field->flags & FIELD_IN_PART_FUNC_FLAG) != 0)

  {

    col.setPartitionKey(TRUE);

  }


  // Set autoincrement

  if (field->flags & AUTO_INCREMENT_FLAG)

  {

#ifndef DBUG_OFF

    char buff[22];

#endif

    col.setAutoIncrement(TRUE);

    ulonglong value= create_info->auto_increment_value ?

      create_info->auto_increment_value : (ulonglong) 1;

    DBUG_PRINT("info", ("Autoincrement key, initial: %s", llstr(value, buff)));

    col.setAutoIncrementInitialValue(value);

  }

  else

    col.setAutoIncrement(FALSE);


#ifndef NDB_WITHOUT_COLUMN_FORMAT

  DBUG_PRINT("info", ("storage: %u  format: %u  ",

                      field->field_storage_type(),

                      field->column_format()));

  switch (field->field_storage_type()) {

  case(HA_SM_DEFAULT):

  default:

    if (create_info->storage_media == HA_SM_DISK)

      type= NDBCOL::StorageTypeDisk;

    else

      type= NDBCOL::StorageTypeMemory;

    break;

  case(HA_SM_DISK):

    type= NDBCOL::StorageTypeDisk;

    break;

  case(HA_SM_MEMORY):

    type= NDBCOL::StorageTypeMemory;

    break;

  }


  switch (field->column_format()) {

  case(COLUMN_FORMAT_TYPE_FIXED):

    dynamic= FALSE;

    break;

  case(COLUMN_FORMAT_TYPE_DYNAMIC):

    dynamic= TRUE;

    break;

  case(COLUMN_FORMAT_TYPE_DEFAULT):

  default:

    if (create_info->row_type == ROW_TYPE_DEFAULT)

      dynamic= default_format;

    else

      dynamic= (create_info->row_type == ROW_TYPE_DYNAMIC);

    break;

  }

#endif

  DBUG_PRINT("info", ("Column %s is declared %s", field->field_name,

                      (dynamic) ? "dynamic" : "static"));

  if (type == NDBCOL::StorageTypeDisk)

  {

    if (dynamic)

    {

      DBUG_PRINT("info", ("Dynamic disk stored column %s changed to static",

                          field->field_name));

      dynamic= false;

    }


#ifndef NDB_WITHOUT_COLUMN_FORMAT

    if (thd && field->column_format() == COLUMN_FORMAT_TYPE_DYNAMIC)

    {

      push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                          ER_ILLEGAL_HA_CREATE_OPTION,

                          "DYNAMIC column %s with "

                          "STORAGE DISK is not supported, "

                          "column will become FIXED",

                          field->field_name);

    }

#endif

  }


  switch (create_info->row_type) {

  case ROW_TYPE_FIXED:

    if (thd && (dynamic || field_type_forces_var_part(field->type())))

    {

      push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                          ER_ILLEGAL_HA_CREATE_OPTION,

                          "Row format FIXED incompatible with "

                          "dynamic attribute %s",

                          field->field_name);

    }

    break;

  case ROW_TYPE_DYNAMIC:

    /*

      Future: make columns dynamic in this case

    */

    break;

  default:

    break;

  }


  DBUG_PRINT("info", ("Format %s, Storage %s", (dynamic)?"dynamic":"fixed",(type == NDBCOL::StorageTypeDisk)?"disk":"memory"));

  col.setStorageType(type);

  col.setDynamic(dynamic);


  DBUG_RETURN(0);

}


void ha_ndbcluster::update_create_info(HA_CREATE_INFO *create_info)

{

  DBUG_ENTER("ha_ndbcluster::update_create_info");

  THD *thd= current_thd;

  const NDBTAB *ndbtab= m_table;

  Ndb *ndb= check_ndb_in_thd(thd);


  if (!(create_info->used_fields & HA_CREATE_USED_AUTO))

  {

    /*

      Find any initial auto_increment value

    */

    for (uint i= 0; i < table->s->fields; i++)

    {

      Field *field= table->field[i];

      if (field->flags & AUTO_INCREMENT_FLAG)

      {

        ulonglong auto_value;

        uint retries= NDB_AUTO_INCREMENT_RETRIES;

        int retry_sleep= 30; /* 30 milliseconds, transaction */

        for (;;)

        {

          Ndb_tuple_id_range_guard g(m_share);

          if (ndb->readAutoIncrementValue(ndbtab, g.range, auto_value))

          {

            if (--retries && !thd->killed &&

                ndb->getNdbError().status == NdbError::TemporaryError)

            {

              do_retry_sleep(retry_sleep);

              continue;

            }

            const NdbError err= ndb->getNdbError();

            sql_print_error("Error %lu in ::update_create_info(): %s",

                            (ulong) err.code, err.message);

            DBUG_VOID_RETURN;

          }

          break;

        }

        if (auto_value > 1)

        {

          create_info->auto_increment_value= auto_value;

        }

        break;

      }

    }

  }


  DBUG_VOID_RETURN;

}


/*

  Create a table in NDB Cluster

 */

static uint get_no_fragments(ulonglong max_rows)

{

  ulonglong acc_row_size= 25 + /*safety margin*/ 2;

  ulonglong acc_fragment_size= 512*1024*1024;

  return uint((max_rows*acc_row_size)/acc_fragment_size)+1;

}


/*

  Routine to adjust default number of partitions to always be a multiple

  of number of nodes and never more than 4 times the number of nodes.


*/

static

bool

adjusted_frag_count(Ndb* ndb,

                    uint requested_frags,

                    uint &reported_frags)

{

  unsigned no_nodes= g_ndb_cluster_connection->no_db_nodes();

  unsigned no_replicas= no_nodes == 1 ? 1 : 2;


  unsigned no_threads= 1;

  const unsigned no_nodegroups= g_ndb_cluster_connection->max_nodegroup() + 1;


  {

    char dbname[FN_HEADLEN+1];

    dbname[FN_HEADLEN]= 0;

    strnmov(dbname, ndb->getDatabaseName(), sizeof(dbname) - 1);

    ndb->setDatabaseName("sys");

    Ndb_table_guard ndbtab_g(ndb->getDictionary(), "SYSTAB_0");

    const NdbDictionary::Table * tab = ndbtab_g.get_table();

    if (tab)

    {

      no_replicas= ndbtab_g.get_table()->getReplicaCount();


      {

        const Uint32 frags = tab->getFragmentCount();

        Uint32 node = 0;

        Uint32 cnt = 0;

        for (Uint32 i = 0; i<frags; i++)

        {

          Uint32 replicas[4];

          if (tab->getFragmentNodes(i, replicas, NDB_ARRAY_SIZE(replicas)))

          {

            if (node == replicas[0] || node == 0)

            {

              node = replicas[0];

              cnt ++;

            }

          }

        }

        no_threads = cnt; // No of primary replica on 1-node

      }

    }

    ndb->setDatabaseName(dbname);

  }


  const unsigned usable_nodes = no_replicas * no_nodegroups;

  const uint max_replicas = 8 * usable_nodes * no_threads;


  reported_frags = usable_nodes * no_threads; // Start with 1 frag per threads

  Uint32 replicas = reported_frags * no_replicas;


  while (reported_frags < requested_frags &&

         (replicas + usable_nodes * no_threads * no_replicas) <= max_replicas)

  {

    reported_frags += usable_nodes * no_threads;

    replicas += usable_nodes * no_threads * no_replicas;

  }


  return (reported_frags < requested_frags);

}


int ha_ndbcluster::create(const char *name,

                          TABLE *form,

                          HA_CREATE_INFO *create_info)

{

  THD *thd= current_thd;

  NDBTAB tab;

  NDBCOL col;

  size_t pack_length, length;

  uint i, pk_length= 0;

  uchar *data= NULL, *pack_data= NULL;

  bool create_temporary= (create_info->options & HA_LEX_CREATE_TMP_TABLE);

  bool create_from_engine= (create_info->table_options & HA_OPTION_CREATE_FROM_ENGINE);

  bool is_truncate= (thd->lex->sql_command == SQLCOM_TRUNCATE);

  bool use_disk= FALSE;

  NdbDictionary::Table::SingleUserMode single_user_mode= NdbDictionary::Table::SingleUserModeLocked;

  bool ndb_sys_table= FALSE;

  int result= 0;

  NdbDictionary::ObjectId objId;


  DBUG_ENTER("ha_ndbcluster::create");

  DBUG_PRINT("enter", ("name: %s", name));


  if (create_temporary)

  {

    /*

      Ndb does not support temporary tables

     */

    my_errno= ER_ILLEGAL_HA_CREATE_OPTION;

    DBUG_PRINT("info", ("Ndb doesn't support temporary tables"));

    push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                        ER_ILLEGAL_HA_CREATE_OPTION,

                        "Ndb doesn't support temporary tables");

    DBUG_RETURN(my_errno);

  }


  DBUG_ASSERT(*fn_rext((char*)name) == 0);

  set_dbname(name);

  set_tabname(name);


  if ((my_errno= check_ndb_connection(thd)))

    DBUG_RETURN(my_errno);


  Ndb *ndb= get_ndb(thd);

  NDBDICT *dict= ndb->getDictionary();


  table= form;

  if (create_from_engine)

  {

    /*

      Table already exists in NDB and frm file has been created by

      caller.

      Do Ndb specific stuff, such as create a .ndb file

    */

    if ((my_errno= write_ndb_file(name)))

      DBUG_RETURN(my_errno);


    ndbcluster_create_binlog_setup(thd, ndb, name, strlen(name),

                                   m_dbname, m_tabname, form);

    DBUG_RETURN(my_errno);

  }


  Thd_ndb *thd_ndb= get_thd_ndb(thd);


  if (!((thd_ndb->options & TNO_NO_LOCK_SCHEMA_OP) ||

        thd_ndb->has_required_global_schema_lock("ha_ndbcluster::create")))


    DBUG_RETURN(HA_ERR_NO_CONNECTION);


  /*

    Don't allow table creation unless

    schema distribution table is setup

    ( unless it is a creation of the schema dist table itself )

  */

  if (!ndb_schema_share)

  {

    if (!(strcmp(m_dbname, NDB_REP_DB) == 0 &&

          strcmp(m_tabname, NDB_SCHEMA_TABLE) == 0))

    {

      DBUG_PRINT("info", ("Schema distribution table not setup"));

      DBUG_RETURN(HA_ERR_NO_CONNECTION);

    }

    single_user_mode = NdbDictionary::Table::SingleUserModeReadWrite;

    ndb_sys_table= TRUE;

  }


  if (!ndb_apply_status_share)

  {

    if ((strcmp(m_dbname, NDB_REP_DB) == 0 &&

         strcmp(m_tabname, NDB_APPLY_TABLE) == 0))

    {

      ndb_sys_table= TRUE;

    }

  }


  if (is_truncate)

  {

    Ndb_table_guard ndbtab_g(dict);

    ndbtab_g.init(m_tabname);

    if (!(m_table= ndbtab_g.get_table()))

      ERR_RETURN(dict->getNdbError());

    m_table= NULL;

    DBUG_PRINT("info", ("Dropping and re-creating table for TRUNCATE"));

    if ((my_errno= delete_table(name)))

      DBUG_RETURN(my_errno);

    ndbtab_g.reinit();

  }


  NDB_Modifiers table_modifiers(ndb_table_modifiers);

  table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,

                        create_info->comment.length);

  const NDB_Modifier * mod_nologging = table_modifiers.get("NOLOGGING");


#ifdef HAVE_NDB_BINLOG

  /* Read ndb_replication entry for this table, if any */

  Uint32 binlog_flags;

  const st_conflict_fn_def* conflict_fn= NULL;

  st_conflict_fn_arg args[MAX_CONFLICT_ARGS];

  Uint32 num_args = MAX_CONFLICT_ARGS;


  int rep_read_rc= ndbcluster_get_binlog_replication_info(thd,

                                                          ndb,

                                                          m_dbname,

                                                          m_tabname,

                                                          ::server_id,

                                                          form,

                                                          &binlog_flags,

                                                          &conflict_fn,

                                                          args,

                                                          &num_args);

  if (rep_read_rc != 0)

  {

    DBUG_RETURN(rep_read_rc);

  }


  /* Reset database name */

  ndb->setDatabaseName(m_dbname);


  /* Use ndb_replication information as required */

  if (conflict_fn != NULL)

  {

    switch(conflict_fn->type)

    {

    case CFT_NDB_EPOCH:

    {

      /* Default 6 extra Gci bits allows 2^6 == 64

       * epochs / saveGCP, a comfortable default

       */

      Uint32 numExtraGciBits = 6;

      Uint32 numExtraAuthorBits = 1;


      if ((num_args == 1) &&

          (args[0].type == CFAT_EXTRA_GCI_BITS))

      {

        numExtraGciBits = args[0].extraGciBits;

      }

      DBUG_PRINT("info", ("Setting ExtraRowGciBits to %u, "

                          "ExtraAuthorBits to %u",

                          numExtraGciBits,

                          numExtraAuthorBits));


      tab.setExtraRowGciBits(numExtraGciBits);

      tab.setExtraRowAuthorBits(numExtraAuthorBits);

    }

    default:

      break;

    }

  }

#endif


  if ((dict->beginSchemaTrans() == -1))

  {

    DBUG_PRINT("info", ("Failed to start schema transaction"));

    goto err_return;

  }

  DBUG_PRINT("info", ("Started schema transaction"));


  DBUG_PRINT("table", ("name: %s", m_tabname));

  if (tab.setName(m_tabname))

  {

    my_errno= errno;

    goto abort;

  }

  if (!ndb_sys_table)

  {

    if (THDVAR(thd, table_temporary))

    {

#ifdef DOES_NOT_WORK_CURRENTLY

      tab.setTemporary(TRUE);

#endif

      tab.setLogging(FALSE);

    }

    else if (THDVAR(thd, table_no_logging))

    {

      tab.setLogging(FALSE);

    }


    if (mod_nologging->m_found)

    {

      tab.setLogging(!mod_nologging->m_val_bool);

    }

  }

  tab.setSingleUserMode(single_user_mode);


  // Save frm data for this table

  if (readfrm(name, &data, &length))

  {

    result= 1;

    goto abort_return;

  }

  if (packfrm(data, length, &pack_data, &pack_length))

  {

    my_free((char*)data, MYF(0));

    result= 2;

    goto abort_return;

  }

  DBUG_PRINT("info",

             ("setFrm data: 0x%lx  len: %lu", (long) pack_data,

              (ulong) pack_length));

  tab.setFrm(pack_data, Uint32(pack_length));

  my_free((char*)data, MYF(0));

  my_free((char*)pack_data, MYF(0));


  /*

    Handle table row type


    Default is to let table rows have var part reference so that online

    add column can be performed in the future.  Explicitly setting row

    type to fixed will omit var part reference, which will save data

    memory in ndb, but at the cost of not being able to online add

    column to this table

  */

  switch (create_info->row_type) {

  case ROW_TYPE_FIXED:

    tab.setForceVarPart(FALSE);

    break;

  case ROW_TYPE_DYNAMIC:

    /* fall through, treat as default */

  default:

    /* fall through, treat as default */

  case ROW_TYPE_DEFAULT:

    tab.setForceVarPart(TRUE);

    break;

  }


  /*

    Setup columns

  */

  my_bitmap_map *old_map;

  {

    restore_record(form, s->default_values);

    old_map= tmp_use_all_columns(form, form->read_set);

  }


  for (i= 0; i < form->s->fields; i++)

  {

    Field *field= form->field[i];

    DBUG_PRINT("info", ("name: %s, type: %u, pack_length: %d",

                        field->field_name, field->real_type(),

                        field->pack_length()));

    if ((my_errno= create_ndb_column(thd, col, field, create_info)))

      goto abort;


    if (!use_disk &&

        col.getStorageType() == NDBCOL::StorageTypeDisk)

      use_disk= TRUE;


    if (tab.addColumn(col))

    {

      my_errno= errno;

      goto abort;

    }

    if (col.getPrimaryKey())

      pk_length += (field->pack_length() + 3) / 4;

  }


  tmp_restore_column_map(form->read_set, old_map);

  if (use_disk)

  {

    tab.setLogging(TRUE);

    tab.setTemporary(FALSE);

    if (create_info->tablespace)

      tab.setTablespaceName(create_info->tablespace);

    else

      tab.setTablespaceName("DEFAULT-TS");

  }


  // Save the table level storage media setting

  switch(create_info->storage_media)

  {

    case HA_SM_DISK:

      tab.setStorageType(NdbDictionary::Column::StorageTypeDisk);

      break;

    case HA_SM_DEFAULT:

      tab.setStorageType(NdbDictionary::Column::StorageTypeDefault);

      break;

    case HA_SM_MEMORY:

      tab.setStorageType(NdbDictionary::Column::StorageTypeMemory);

      break;

  }


  DBUG_PRINT("info", ("Table %s is %s stored with tablespace %s",

                      m_tabname,

                      (use_disk) ? "disk" : "memory",

                      (use_disk) ? tab.getTablespaceName() : "N/A"));


  KEY* key_info;

  for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++)

  {

    KEY_PART_INFO *key_part= key_info->key_part;

    KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;

    for (; key_part != end; key_part++)

    {

#ifndef NDB_WITHOUT_COLUMN_FORMAT

      if (key_part->field->field_storage_type() == HA_SM_DISK)

      {

        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                            ER_ILLEGAL_HA_CREATE_OPTION,

                            ER(ER_ILLEGAL_HA_CREATE_OPTION),

                            ndbcluster_hton_name,

                            "Index on field "

                            "declared with "

                            "STORAGE DISK is not supported");

        result= HA_ERR_UNSUPPORTED;

        goto abort_return;

      }

#endif

      tab.getColumn(key_part->fieldnr-1)->setStorageType(

                             NdbDictionary::Column::StorageTypeMemory);

    }

  }


  // No primary key, create shadow key as 64 bit, auto increment

  if (form->s->primary_key == MAX_KEY)

  {

    DBUG_PRINT("info", ("Generating shadow key"));

    if (col.setName("$PK"))

    {

      my_errno= errno;

      goto abort;

    }

    col.setType(NdbDictionary::Column::Bigunsigned);

    col.setLength(1);

    col.setNullable(FALSE);

    col.setPrimaryKey(TRUE);

    col.setAutoIncrement(TRUE);

    col.setDefaultValue(NULL, 0);

    if (tab.addColumn(col))

    {

      my_errno= errno;

      goto abort;

    }

    pk_length += 2;

  }


  // Make sure that blob tables don't have too big part size

  for (i= 0; i < form->s->fields; i++)

  {

    // To be upgrade/downgrade safe...we currently use

    // old NDB_MAX_TUPLE_SIZE_IN_WORDS, unless MAX_BLOB_PART_SIZE is set

    switch (form->field[i]->real_type()) {

    case MYSQL_TYPE_GEOMETRY:

    case MYSQL_TYPE_BLOB:

    case MYSQL_TYPE_MEDIUM_BLOB:

    case MYSQL_TYPE_LONG_BLOB:

    {

      NdbDictionary::Column * column= tab.getColumn(i);

      unsigned size= pk_length + (column->getPartSize()+3)/4 + 7;

      unsigned ndb_max= OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS;

      if (column->getPartSize() > (int)(4 * ndb_max))

        ndb_max= NDB_MAX_TUPLE_SIZE_IN_WORDS; // MAX_BLOB_PART_SIZE


      if (size > ndb_max &&

          (pk_length+7) < ndb_max)

      {

        size= ndb_max - pk_length - 7;

        column->setPartSize(4*size);

      }

    }

    default:

      break;

    }

  }


  // Check partition info

  if ((my_errno= set_up_partition_info(form->part_info, tab)))

    goto abort;


  if (tab.getFragmentType() == NDBTAB::HashMapPartition &&

      tab.getDefaultNoPartitionsFlag() &&

      (create_info->max_rows != 0 || create_info->min_rows != 0))

  {

    ulonglong rows= create_info->max_rows >= create_info->min_rows ?

      create_info->max_rows :

      create_info->min_rows;

    uint no_fragments= get_no_fragments(rows);

    uint reported_frags= no_fragments;

    if (adjusted_frag_count(ndb, no_fragments, reported_frags))

    {

      push_warning(current_thd,

                   Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,

                   "Ndb might have problems storing the max amount "

                   "of rows specified");

    }

    tab.setFragmentCount(reported_frags);

    tab.setDefaultNoPartitionsFlag(false);

    tab.setFragmentData(0, 0);

  }


  // Check for HashMap

  if (tab.getFragmentType() == NDBTAB::HashMapPartition &&

      tab.getDefaultNoPartitionsFlag())

  {

    tab.setFragmentCount(0);

    tab.setFragmentData(0, 0);

  }

  else if (tab.getFragmentType() == NDBTAB::HashMapPartition)

  {

    NdbDictionary::HashMap hm;

    int res= dict->getDefaultHashMap(hm, tab.getFragmentCount());

    if (res == -1)

    {

      res= dict->initDefaultHashMap(hm, tab.getFragmentCount());

      if (res == -1)

      {

        const NdbError err= dict->getNdbError();

        my_errno= ndb_to_mysql_error(&err);

        goto abort;

      }


      res= dict->createHashMap(hm);

      if (res == -1)

      {

        const NdbError err= dict->getNdbError();

        my_errno= ndb_to_mysql_error(&err);

        goto abort;

      }

    }

  }


  // Create the table in NDB

  if (dict->createTable(tab, &objId) != 0)

  {

    const NdbError err= dict->getNdbError();

    my_errno= ndb_to_mysql_error(&err);

    goto abort;

  }


  DBUG_PRINT("info", ("Table %s/%s created successfully",

                      m_dbname, m_tabname));


  // Create secondary indexes

  tab.assignObjId(objId);

  m_table= &tab;

  my_errno= create_indexes(thd, ndb, form);

  m_table= 0;


  if (!my_errno)

  {

    /*

     * All steps have succeeded, try and commit schema transaction

     */

    if (dict->endSchemaTrans() == -1)

      goto err_return;

    my_errno= write_ndb_file(name);

  }

  else

  {

abort:

/*

 *  Some step during table creation failed, abort schema transaction

 */

    DBUG_PRINT("info", ("Aborting schema transaction due to error %i",

                        my_errno));

    if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)

        == -1)

      DBUG_PRINT("info", ("Failed to abort schema transaction, %i",

                          dict->getNdbError().code));

    m_table= 0;

    DBUG_RETURN(my_errno);

abort_return:

    DBUG_PRINT("info", ("Aborting schema transaction"));

    if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)

        == -1)

      DBUG_PRINT("info", ("Failed to abort schema transaction, %i",

                          dict->getNdbError().code));

    DBUG_RETURN(result);

err_return:

    m_table= 0;

    ERR_RETURN(dict->getNdbError());

  }


  Ndb_table_guard ndbtab_g(dict, m_tabname);

  m_table= ndbtab_g.get_table();


  if (my_errno)

  {

    /*

      Failed to create an index,

      drop the table (and all it's indexes)

    */

    while (!thd->killed)

    {

      if (dict->beginSchemaTrans() == -1)

        goto cleanup_failed;

      if (dict->dropTableGlobal(*m_table))

      {

        switch (dict->getNdbError().status)

        {

        case NdbError::TemporaryError:

          if (!thd->killed)

          {

            if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)

                == -1)

              DBUG_PRINT("info", ("Failed to abort schema transaction, %i",

                                  dict->getNdbError().code));

            goto cleanup_failed;

          }

          break;

        default:

          break;

        }

      }

      if (dict->endSchemaTrans() == -1)

      {

cleanup_failed:

        DBUG_PRINT("info", ("Could not cleanup failed create %i",

                          dict->getNdbError().code));

        continue; // retry indefinitly

      }

      break;

    }

    m_table = 0;

    DBUG_RETURN(my_errno);

  }

  else // if (!my_errno)

  {

    NDB_SHARE *share= 0;

    pthread_mutex_lock(&ndbcluster_mutex);

    /*

      First make sure we get a "fresh" share here, not an old trailing one...

    */

    {

      uint length= (uint) strlen(name);

      if ((share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,

                                              (const uchar*) name, length)))

        handle_trailing_share(thd, share);

    }

    /*

      get a new share

    */


    /* ndb_share reference create */

    if (!(share= get_share(name, form, TRUE, TRUE)))

    {

      sql_print_error("NDB: allocating table share for %s failed", name);

      /* my_errno is set */

    }

    else

    {

      DBUG_PRINT("NDB_SHARE", ("%s binlog create  use_count: %u",

                               share->key, share->use_count));

    }

    pthread_mutex_unlock(&ndbcluster_mutex);


    while (!IS_TMP_PREFIX(m_tabname))

    {

#ifdef HAVE_NDB_BINLOG

      if (share)

      {

        /* Set the Binlogging information we retrieved above */

        ndbcluster_apply_binlog_replication_info(thd,

                                                 share,

                                                 m_table,

                                                 form,

                                                 conflict_fn,

                                                 args,

                                                 num_args,

                                                 TRUE, /* Do set binlog flags */

                                                 binlog_flags);

      }

#endif

      String event_name(INJECTOR_EVENT_LEN);

      ndb_rep_event_name(&event_name, m_dbname, m_tabname,

                         get_binlog_full(share));

      int do_event_op= ndb_binlog_running;


      if (!ndb_schema_share &&

          strcmp(share->db, NDB_REP_DB) == 0 &&

          strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0)

        do_event_op= 1;


      /*

        Always create an event for the table, as other mysql servers

        expect it to be there.

      */

      if (!ndbcluster_create_event(thd, ndb, m_table, event_name.c_ptr(), share,

                                   share && do_event_op ? 2 : 1/* push warning */))

      {

        if (opt_ndb_extra_logging)

          sql_print_information("NDB Binlog: CREATE TABLE Event: %s",

                                event_name.c_ptr());

        if (share &&

            ndbcluster_create_event_ops(thd, share,

                                        m_table, event_name.c_ptr()))

        {

          sql_print_error("NDB Binlog: FAILED CREATE TABLE event operations."

                          " Event: %s", name);

          /* a warning has been issued to the client */

        }

      }

      /*

        warning has been issued if ndbcluster_create_event failed

        and (share && do_event_op)

      */

      if (share && !do_event_op)

        set_binlog_nologging(share);

      ndbcluster_log_schema_op(thd,

                               thd->query(), thd->query_length(),

                               share->db, share->table_name,

                               m_table->getObjectId(),

                               m_table->getObjectVersion(),

                               (is_truncate) ?

                               SOT_TRUNCATE_TABLE : SOT_CREATE_TABLE,

                               NULL, NULL);

      break;

    }

  }


  m_table= 0;

  DBUG_RETURN(my_errno);

}


int ha_ndbcluster::create_index(THD *thd, const char *name, KEY *key_info,

                                NDB_INDEX_TYPE idx_type, uint idx_no)

{

  int error= 0;

  char unique_name[FN_LEN + 1];

  static const char* unique_suffix= "$unique";

  DBUG_ENTER("ha_ndbcluster::create_ordered_index");

  DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name));


  if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)

  {

    strxnmov(unique_name, FN_LEN, name, unique_suffix, NullS);

    DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d",

                        unique_name, idx_no));

  }


  switch (idx_type){

  case PRIMARY_KEY_INDEX:

    // Do nothing, already created

    break;

  case PRIMARY_KEY_ORDERED_INDEX:

    error= create_ordered_index(thd, name, key_info);

    break;

  case UNIQUE_ORDERED_INDEX:

    if (!(error= create_ordered_index(thd, name, key_info)))

      error= create_unique_index(thd, unique_name, key_info);

    break;

  case UNIQUE_INDEX:

    if (check_index_fields_not_null(key_info))

    {

      push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                          ER_NULL_COLUMN_IN_INDEX,

                          "Ndb does not support unique index on NULL valued attributes, index access with NULL value will become full table scan");

    }

    error= create_unique_index(thd, unique_name, key_info);

    break;

  case ORDERED_INDEX:

    if (key_info->algorithm == HA_KEY_ALG_HASH)

    {

      push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                          ER_ILLEGAL_HA_CREATE_OPTION,

                          ER(ER_ILLEGAL_HA_CREATE_OPTION),

                          ndbcluster_hton_name,

                          "Ndb does not support non-unique "

                          "hash based indexes");

      error= HA_ERR_UNSUPPORTED;

      break;

    }

    error= create_ordered_index(thd, name, key_info);

    break;

  default:

    DBUG_ASSERT(FALSE);

    break;

  }


  DBUG_RETURN(error);

}


int ha_ndbcluster::create_ordered_index(THD *thd, const char *name,

                                        KEY *key_info)

{

  DBUG_ENTER("ha_ndbcluster::create_ordered_index");

  DBUG_RETURN(create_ndb_index(thd, name, key_info, FALSE));

}


int ha_ndbcluster::create_unique_index(THD *thd, const char *name,

                                       KEY *key_info)

{


  DBUG_ENTER("ha_ndbcluster::create_unique_index");

  DBUG_RETURN(create_ndb_index(thd, name, key_info, TRUE));

}


int ha_ndbcluster::create_ndb_index(THD *thd, const char *name,

                                    KEY *key_info,

                                    bool unique)

{

  char index_name[FN_LEN + 1];

  Ndb *ndb= get_ndb(thd);

  NdbDictionary::Dictionary *dict= ndb->getDictionary();

  KEY_PART_INFO *key_part= key_info->key_part;

  KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;


  DBUG_ENTER("ha_ndbcluster::create_index");

  DBUG_PRINT("enter", ("name: %s ", name));


  ndb_protect_char(name, index_name, sizeof(index_name) - 1, '/');

  DBUG_PRINT("info", ("index name: %s ", index_name));


  NdbDictionary::Index ndb_index(index_name);

  if (unique)

    ndb_index.setType(NdbDictionary::Index::UniqueHashIndex);

  else

  {

    ndb_index.setType(NdbDictionary::Index::OrderedIndex);

    // TODO Only temporary ordered indexes supported

    ndb_index.setLogging(FALSE);

  }

  if (!m_table->getLogging())

    ndb_index.setLogging(FALSE);

  if (((NDBTAB*)m_table)->getTemporary())

    ndb_index.setTemporary(TRUE);

  if (ndb_index.setTable(m_tabname))

  {

    DBUG_RETURN(my_errno= errno);

  }


  for (; key_part != end; key_part++)

  {

    Field *field= key_part->field;

#ifndef NDB_WITHOUT_COLUMN_FORMAT

    if (field->field_storage_type() == HA_SM_DISK)

    {

      push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                          ER_ILLEGAL_HA_CREATE_OPTION,

                          ER(ER_ILLEGAL_HA_CREATE_OPTION),

                          ndbcluster_hton_name,

                          "Index on field "

                          "declared with "

                          "STORAGE DISK is not supported");

      DBUG_RETURN(HA_ERR_UNSUPPORTED);

    }

#endif

    DBUG_PRINT("info", ("attr: %s", field->field_name));

    if (ndb_index.addColumnName(field->field_name))

    {

      DBUG_RETURN(my_errno= errno);

    }

  }


  if (dict->createIndex(ndb_index, *m_table))

    ERR_RETURN(dict->getNdbError());


  // Success

  DBUG_PRINT("info", ("Created index %s", name));

  DBUG_RETURN(0);

}


int ha_ndbcluster::add_index_impl(THD *thd, TABLE *table_arg,

                                  KEY *key_info, uint num_of_keys)

{

  int error= 0;

  uint idx;

  DBUG_ENTER("ha_ndbcluster::add_index");

  DBUG_PRINT("enter", ("table %s", table_arg->s->table_name.str));

  DBUG_ASSERT(m_share->state == NSS_ALTERED);


  for (idx= 0; idx < num_of_keys; idx++)

  {

    KEY *key= key_info + idx;

    KEY_PART_INFO *key_part= key->key_part;

    KEY_PART_INFO *end= key_part + key->user_defined_key_parts;

    NDB_INDEX_TYPE idx_type= get_index_type_from_key(idx, key_info, false);

    DBUG_PRINT("info", ("Adding index: '%s'", key_info[idx].name));

    // Add fields to key_part struct

    for (; key_part != end; key_part++)

      key_part->field= table->field[key_part->fieldnr];

    // Check index type

    // Create index in ndb

    if((error= create_index(thd, key_info[idx].name, key, idx_type, idx)))

      break;

  }

  DBUG_RETURN(error);

}


int ha_ndbcluster::rename_table(const char *from, const char *to)

{

  THD *thd= current_thd;

  NDBDICT *dict;

  char old_dbname[FN_HEADLEN];

  char new_dbname[FN_HEADLEN];

  char new_tabname[FN_HEADLEN];

  const NDBTAB *orig_tab;

  int result;

  bool recreate_indexes= FALSE;

  NDBDICT::List index_list;


  DBUG_ENTER("ha_ndbcluster::rename_table");

  DBUG_PRINT("info", ("Renaming %s to %s", from, to));


  if (thd == injector_thd)

  {

    /*

      Table was renamed remotely is already

      renamed inside ndb.

      Just rename .ndb file.

     */

    DBUG_RETURN(handler::rename_table(from, to));

  }


  set_dbname(from, old_dbname);

  set_dbname(to, new_dbname);

  set_tabname(from);

  set_tabname(to, new_tabname);


  if (check_ndb_connection(thd))

    DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION);


  Thd_ndb *thd_ndb= thd_get_thd_ndb(thd);

  if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::rename_table"))

    DBUG_RETURN(HA_ERR_NO_CONNECTION);


  Ndb *ndb= get_ndb(thd);

  ndb->setDatabaseName(old_dbname);

  dict= ndb->getDictionary();

  Ndb_table_guard ndbtab_g(dict, m_tabname);

  if (!(orig_tab= ndbtab_g.get_table()))

    ERR_RETURN(dict->getNdbError());


  if (my_strcasecmp(system_charset_info, new_dbname, old_dbname))

  {

    dict->listIndexes(index_list, *orig_tab);

    recreate_indexes= TRUE;

  }

  // Change current database to that of target table

  set_dbname(to);

  if (ndb->setDatabaseName(m_dbname))

  {

    ERR_RETURN(ndb->getNdbError());

  }


  int ndb_table_id= orig_tab->getObjectId();

  int ndb_table_version= orig_tab->getObjectVersion();

  /* ndb_share reference temporary */

  NDB_SHARE *share= get_share(from, 0, FALSE);

  int is_old_table_tmpfile= IS_TMP_PREFIX(m_tabname);

  int is_new_table_tmpfile= IS_TMP_PREFIX(new_tabname);

  if (!is_new_table_tmpfile && !is_old_table_tmpfile)

  {

    /*

      this is a "real" rename table, i.e. not tied to an offline alter table

      - send new name == "to" in query field

    */

    ndbcluster_log_schema_op(thd, to, strlen(to),

                             old_dbname, m_tabname,

                             ndb_table_id, ndb_table_version,

                             SOT_RENAME_TABLE_PREPARE,

                             m_dbname, new_tabname);

  }

  if (share)

  {

    DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",

                             share->key, share->use_count));

    ndbcluster_prepare_rename_share(share, to);

    int ret = ndbcluster_rename_share(thd, share);

    assert(ret == 0); NDB_IGNORE_VALUE(ret);

  }


  NdbDictionary::Table new_tab= *orig_tab;

  new_tab.setName(new_tabname);

  if (dict->alterTableGlobal(*orig_tab, new_tab) != 0)

  {

    NdbError ndb_error= dict->getNdbError();

    if (share)

    {

      int ret = ndbcluster_undo_rename_share(thd, share);

      assert(ret == 0); NDB_IGNORE_VALUE(ret);

      /* ndb_share reference temporary free */

      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                               share->key, share->use_count));

      free_share(&share);

    }

    ERR_RETURN(ndb_error);

  }


  // Rename .ndb file

  if ((result= handler::rename_table(from, to)))

  {

    // ToDo in 4.1 should rollback alter table...

    if (share)

    {

      /* ndb_share reference temporary free */

      DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",

                               share->key, share->use_count));

      free_share(&share);

    }

    DBUG_RETURN(result);

  }


  /* handle old table */

  if (!is_old_table_tmpfile)

  {

    ndbcluster_drop_event(thd, ndb, share, "rename table",

                          old_dbname, m_tabname);

  }


  if (!result && !is_new_table_tmpfile)

  {

    Ndb_table_guard ndbtab_g2(dict, new_tabname);

    const NDBTAB *ndbtab= ndbtab_g2.get_table();

#ifdef HAVE_NDB_BINLOG

    if (share)

      ndbcluster_read_binlog_replication(thd, ndb, share, ndbtab,

                                         ::server_id, NULL, TRUE);

#endif

    /* always create an event for the table */

    String event_name(INJECTOR_EVENT_LEN);

    ndb_rep_event_name(&event_name, new_dbname, new_tabname,

                       get_binlog_full(share));


    if (!Ndb_dist_priv_util::is_distributed_priv_table(new_dbname,

                                                       new_tabname) &&

        !ndbcluster_create_event(thd, ndb, ndbtab, event_name.c_ptr(), share,

                                 share && ndb_binlog_running ? 2 : 1/* push warning */))

    {

      if (opt_ndb_extra_logging)

        sql_print_information("NDB Binlog: RENAME Event: %s",

                              event_name.c_ptr());

      if (share && (share->op == 0) &&

          ndbcluster_create_event_ops(thd, share, ndbtab, event_name.c_ptr()))

      {

        sql_print_error("NDB Binlog: FAILED create event operations "

                        "during RENAME. Event %s", event_name.c_ptr());

        /* a warning has been issued to the client */

      }

    }

    /*

      warning has been issued if ndbcluster_create_event failed

      and (share && ndb_binlog_running)

    */

    if (!is_old_table_tmpfile)

    {

      /* "real" rename table */

      ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),

                               old_dbname, m_tabname,

                               ndb_table_id, ndb_table_version,

                               SOT_RENAME_TABLE,

                               m_dbname, new_tabname);

    }

    else

    {

      /* final phase of offline alter table */

      ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),

                               m_dbname, new_tabname,

                               ndb_table_id, ndb_table_version,

                               SOT_ALTER_TABLE_COMMIT,

                               NULL, NULL);


    }

  }


  // If we are moving tables between databases, we need to recreate

  // indexes

  if (recreate_indexes)

  {

    for (unsigned i = 0; i < index_list.count; i++)

    {

        NDBDICT::List::Element& index_el = index_list.elements[i];

        // Recreate any indexes not stored in the system database

        if (my_strcasecmp(system_charset_info,

                          index_el.database, NDB_SYSTEM_DATABASE))

        {

          set_dbname(from);

          ndb->setDatabaseName(m_dbname);

          const NDBINDEX * index= dict->getIndexGlobal(index_el.name,  new_tab);

          DBUG_PRINT("info", ("Creating index %s/%s",

                              index_el.database, index->getName()));

          dict->createIndex(*index, new_tab);

          DBUG_PRINT("info", ("Dropping index %s/%s",

                              index_el.database, index->getName()));

          set_dbname(from);

          ndb->setDatabaseName(m_dbname);

          dict->dropIndexGlobal(*index);

        }

    }

  }

  if (share)

  {

    /* ndb_share reference temporary free */

    DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                             share->key, share->use_count));

    free_share(&share);

  }


  DBUG_RETURN(result);

}


static

void

delete_table_drop_share(NDB_SHARE* share, const char * path)

{

  if (share)

  {

    pthread_mutex_lock(&ndbcluster_mutex);

do_drop:

    if (share->state != NSS_DROPPED)

    {

      /*

        The share kept by the server has not been freed, free it

      */

      share->state= NSS_DROPPED;

      /* ndb_share reference create free */

      DBUG_PRINT("NDB_SHARE", ("%s create free  use_count: %u",

                               share->key, share->use_count));

      free_share(&share, TRUE);

    }

    /* ndb_share reference temporary free */

    DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                             share->key, share->use_count));

    free_share(&share, TRUE);

    pthread_mutex_unlock(&ndbcluster_mutex);

  }

  else if (path)

  {

    pthread_mutex_lock(&ndbcluster_mutex);

    share= get_share(path, 0, FALSE, TRUE);

    if (share)

    {

      goto do_drop;

    }

    pthread_mutex_unlock(&ndbcluster_mutex);

  }

}


/* static version which does not need a handler */


int

ha_ndbcluster::drop_table_impl(THD *thd, ha_ndbcluster *h, Ndb *ndb,

                               const char *path,

                               const char *db,

                               const char *table_name)

{

  DBUG_ENTER("ha_ndbcluster::ndbcluster_delete_table");

  NDBDICT *dict= ndb->getDictionary();

  int ndb_table_id= 0;

  int ndb_table_version= 0;

  /*

    Don't allow drop table unless

    schema distribution table is setup

  */

  if (!ndb_schema_share)

  {

    DBUG_PRINT("info", ("Schema distribution table not setup"));

    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  }

  /* ndb_share reference temporary */

  NDB_SHARE *share= get_share(path, 0, FALSE);

  if (share)

  {

    DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",

                             share->key, share->use_count));

  }


  /* Drop the table from NDB */


  int res= 0;

  if (h && h->m_table)

  {

retry_temporary_error1:

    if (dict->dropTableGlobal(*h->m_table) == 0)

    {

      ndb_table_id= h->m_table->getObjectId();

      ndb_table_version= h->m_table->getObjectVersion();

      DBUG_PRINT("info", ("success 1"));

    }

    else

    {

      switch (dict->getNdbError().status)

      {

        case NdbError::TemporaryError:

          if (!thd->killed)

            goto retry_temporary_error1; // retry indefinitly

          break;

        default:

          break;

      }

      res= ndb_to_mysql_error(&dict->getNdbError());

      DBUG_PRINT("info", ("error(1) %u", res));

    }

    h->release_metadata(thd, ndb);

  }

  else

  {

    ndb->setDatabaseName(db);

    while (1)

    {

      Ndb_table_guard ndbtab_g(dict, table_name);

      if (ndbtab_g.get_table())

      {

    retry_temporary_error2:

        if (dict->dropTableGlobal(*ndbtab_g.get_table()) == 0)

        {

          ndb_table_id= ndbtab_g.get_table()->getObjectId();

          ndb_table_version= ndbtab_g.get_table()->getObjectVersion();

          DBUG_PRINT("info", ("success 2"));

          break;

        }

        else

        {

          switch (dict->getNdbError().status)

          {

            case NdbError::TemporaryError:

              if (!thd->killed)

                goto retry_temporary_error2; // retry indefinitly

              break;

            default:

              if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT)

              {

                ndbtab_g.invalidate();

                continue;

              }

              break;

          }

        }

      }

      res= ndb_to_mysql_error(&dict->getNdbError());

      DBUG_PRINT("info", ("error(2) %u", res));

      break;

    }

  }


  if (res)

  {

    /* the drop table failed for some reason, drop the share anyways */

    delete_table_drop_share(share, 0);

    DBUG_RETURN(res);

  }


  /* stop the logging of the dropped table, and cleanup */


  /*

    drop table is successful even if table does not exist in ndb

    and in case table was actually not dropped, there is no need

    to force a gcp, and setting the event_name to null will indicate

    that there is no event to be dropped

  */

  int table_dropped= dict->getNdbError().code != 709;


  {

    if (table_dropped)

    {

      ndbcluster_handle_drop_table(thd, ndb, share, "delete table",

                                   db, table_name);

    }

    else

    {

      ndbcluster_handle_drop_table(thd, ndb, share, "delete table",

                                   0, 0);

    }

  }


  if (!IS_TMP_PREFIX(table_name) && share &&

      thd->lex->sql_command != SQLCOM_TRUNCATE)

  {

    ndbcluster_log_schema_op(thd,

                             thd->query(), thd->query_length(),

                             share->db, share->table_name,

                             ndb_table_id, ndb_table_version,

                             SOT_DROP_TABLE, NULL, NULL);

  }


  delete_table_drop_share(share, 0);

  DBUG_RETURN(0);

}


int ha_ndbcluster::delete_table(const char *name)

{

  THD *thd= current_thd;

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  Ndb *ndb;

  int error= 0;

  DBUG_ENTER("ha_ndbcluster::delete_table");

  DBUG_PRINT("enter", ("name: %s", name));


  if ((thd == injector_thd) ||

      (thd_ndb->options & TNO_NO_NDB_DROP_TABLE))

  {

    /*

      Table was dropped remotely is already

      dropped inside ndb.

      Just drop local files.

    */

    delete_table_drop_share(0, name);

    DBUG_RETURN(handler::delete_table(name));

  }


  set_dbname(name);

  set_tabname(name);


  /*

    Don't allow drop table unless

    schema distribution table is setup

  */

  if (!ndb_schema_share)

  {

    DBUG_PRINT("info", ("Schema distribution table not setup"));

    error= HA_ERR_NO_CONNECTION;

    goto err;

  }


  if (check_ndb_connection(thd))

  {

    error= HA_ERR_NO_CONNECTION;

    goto err;

  }


  ndb= thd_ndb->ndb;


  if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::delete_table"))

  {

    error= HA_ERR_NO_CONNECTION;

    goto err;

  }


  /*

    Drop table in ndb.

    If it was already gone it might have been dropped

    remotely, give a warning and then drop .ndb file.

   */

  if (!(error= drop_table_impl(thd, this, ndb, name,

                               m_dbname, m_tabname)) ||

      error == HA_ERR_NO_SUCH_TABLE)

  {

    /* Call ancestor function to delete .ndb file */

    int error1= handler::delete_table(name);

    if (!error)

      error= error1;

  }


err:

  DBUG_RETURN(error);

}


void ha_ndbcluster::get_auto_increment(ulonglong offset, ulonglong increment,

                                       ulonglong nb_desired_values,

                                       ulonglong *first_value,

                                       ulonglong *nb_reserved_values)

{

  Uint64 auto_value;

  THD *thd= current_thd;

  DBUG_ENTER("get_auto_increment");

  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));

  Ndb *ndb= get_ndb(table->in_use);

  uint retries= NDB_AUTO_INCREMENT_RETRIES;

  int retry_sleep= 30; /* 30 milliseconds, transaction */

  for (;;)

  {

    Ndb_tuple_id_range_guard g(m_share);

    if ((m_skip_auto_increment &&

         ndb->readAutoIncrementValue(m_table, g.range, auto_value)) ||

        ndb->getAutoIncrementValue(m_table, g.range, auto_value,

                                   Uint32(m_autoincrement_prefetch),

                                   increment, offset))

    {

      if (--retries && !thd->killed &&

          ndb->getNdbError().status == NdbError::TemporaryError)

      {

        do_retry_sleep(retry_sleep);

        continue;

      }

      const NdbError err= ndb->getNdbError();

      sql_print_error("Error %lu in ::get_auto_increment(): %s",

                      (ulong) err.code, err.message);

      *first_value= ~(ulonglong) 0;

      DBUG_VOID_RETURN;

    }

    break;

  }

  *first_value= (longlong)auto_value;

  /* From the point of view of MySQL, NDB reserves one row at a time */

  *nb_reserved_values= 1;

  DBUG_VOID_RETURN;

}


ha_ndbcluster::ha_ndbcluster(handlerton *hton, TABLE_SHARE *table_arg):

  handler(hton, table_arg),

  m_thd_ndb(NULL),

  m_active_cursor(NULL),

  m_table(NULL),

  m_ndb_record(0),

  m_ndb_hidden_key_record(0),

  m_table_info(NULL),

  m_share(0),

  m_key_fields(NULL),

  m_part_info(NULL),

  m_user_defined_partitioning(FALSE),

  m_use_partition_pruning(FALSE),

  m_sorted(FALSE),

  m_use_write(FALSE),

  m_ignore_dup_key(FALSE),

  m_has_unique_index(FALSE),

  m_ignore_no_key(FALSE),

  m_read_before_write_removal_possible(FALSE),

  m_read_before_write_removal_used(FALSE),

  m_rows_updated(0),

  m_rows_deleted(0),

  m_rows_to_insert((ha_rows) 1),

  m_rows_inserted((ha_rows) 0),

  m_rows_changed((ha_rows) 0),

  m_delete_cannot_batch(FALSE),

  m_update_cannot_batch(FALSE),

  m_skip_auto_increment(TRUE),

  m_blobs_pending(0),

  m_is_bulk_delete(false),

  m_blobs_row_total_size(0),

  m_blobs_buffer(0),

  m_blobs_buffer_size(0),

  m_dupkey((uint) -1),

  m_autoincrement_prefetch(DEFAULT_AUTO_PREFETCH),

  m_pushed_join_member(NULL),

  m_pushed_join_operation(-1),

  m_disable_pushed_join(FALSE),

  m_active_query(NULL),

  m_pushed_operation(NULL),

  m_cond(NULL),

  m_multi_cursor(NULL)

{

  int i;


  DBUG_ENTER("ha_ndbcluster");


  m_tabname[0]= '\0';

  m_dbname[0]= '\0';


  stats.records= ~(ha_rows)0; // uninitialized

  stats.block_size= 1024;


  for (i= 0; i < MAX_KEY; i++)

    ndb_init_index(m_index[i]);


  DBUG_VOID_RETURN;

}


ha_ndbcluster::~ha_ndbcluster()

{

  THD *thd= current_thd;

  Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb;

  DBUG_ENTER("~ha_ndbcluster");


  if (m_share)

  {

    /* ndb_share reference handler free */

    DBUG_PRINT("NDB_SHARE", ("%s handler free  use_count: %u",

                             m_share->key, m_share->use_count));

    free_share(&m_share);

  }

  release_metadata(thd, ndb);

  release_blobs_buffer();


  // Check for open cursor/transaction

  DBUG_ASSERT(m_thd_ndb == NULL);


  // Discard any generated condition

  DBUG_PRINT("info", ("Deleting generated condition"));

  if (m_cond)

  {

    delete m_cond;

    m_cond= NULL;

  }

  DBUG_PRINT("info", ("Deleting pushed joins"));

#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

  DBUG_ASSERT(m_active_query == NULL);

  DBUG_ASSERT(m_active_cursor == NULL);

  if (m_pushed_join_operation==PUSHED_ROOT)

  {

    delete m_pushed_join_member;             // Also delete QueryDef

  }

  m_pushed_join_member= NULL;

#endif

  DBUG_VOID_RETURN;

}


int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)

{

  THD *thd= current_thd;

  int res;

  KEY *key;

  KEY_PART_INFO *key_part_info;

  uint key_parts, i, j;

  DBUG_ENTER("ha_ndbcluster::open");

  DBUG_PRINT("enter", ("name: %s  mode: %d  test_if_locked: %d",

                       name, mode, test_if_locked));


  if (table_share->primary_key != MAX_KEY)

  {

    /*

      Setup ref_length to make room for the whole

      primary key to be written in the ref variable

    */

    key= table->key_info+table_share->primary_key;

    ref_length= key->key_length;

  }

  else

  {

    if (m_user_defined_partitioning)

    {

      /* Add space for partid in ref */

      ref_length+= sizeof(m_part_id);

    }

  }

  DBUG_PRINT("info", ("ref_length: %d", ref_length));


  {

    char* bitmap_array;

    uint extra_hidden_keys= table_share->primary_key != MAX_KEY ? 0 : 1;

    uint n_keys= table_share->keys + extra_hidden_keys;

    uint ptr_size= sizeof(MY_BITMAP*) * (n_keys + 1 /* null termination */);

    uint map_size= sizeof(MY_BITMAP) * n_keys;

    m_key_fields= (MY_BITMAP**)my_malloc(ptr_size + map_size,

                                         MYF(MY_WME + MY_ZEROFILL));

    if (!m_key_fields)

    {

      local_close(thd, FALSE);

      DBUG_RETURN(1);

    }

    bitmap_array= ((char*)m_key_fields) + ptr_size;

    for (i= 0; i < n_keys; i++)

    {

      my_bitmap_map *bitbuf= NULL;

      bool is_hidden_key= (i == table_share->keys);

      m_key_fields[i]= (MY_BITMAP*)bitmap_array;

      if (is_hidden_key || (i == table_share->primary_key))

      {

        m_pk_bitmap_p= m_key_fields[i];

        bitbuf= m_pk_bitmap_buf;

      }

      if (bitmap_init(m_key_fields[i], bitbuf,

                      table_share->fields, FALSE))

      {

        m_key_fields[i]= NULL;

        local_close(thd, FALSE);

        DBUG_RETURN(1);

      }

      if (!is_hidden_key)

      {

        key= table->key_info + i;

        key_part_info= key->key_part;

        key_parts= key->user_defined_key_parts;

        for (j= 0; j < key_parts; j++, key_part_info++)

          bitmap_set_bit(m_key_fields[i], key_part_info->fieldnr-1);

      }

      else

      {

        uint field_no= table_share->fields;

        ((uchar *)m_pk_bitmap_buf)[field_no>>3]|= (1 << (field_no & 7));

      }

      bitmap_array+= sizeof(MY_BITMAP);

    }

    m_key_fields[i]= NULL;

  }


  set_dbname(name);

  set_tabname(name);


  if ((res= check_ndb_connection(thd)) != 0)

  {

    local_close(thd, FALSE);

    DBUG_RETURN(res);

  }


  // Init table lock structure

  /* ndb_share reference handler */

  if ((m_share=get_share(name, table, FALSE)) == 0)

  {

    if (opt_ndb_extra_logging > 19)

    {

      sql_print_information("Calling ndbcluster_create_binlog_setup(%s) in ::open",

                            name);

    }

    Ndb* ndb= check_ndb_in_thd(thd);

    ndbcluster_create_binlog_setup(thd, ndb, name, strlen(name),

                                   m_dbname, m_tabname, table);

    if ((m_share=get_share(name, table, FALSE)) == 0)

    {

      local_close(thd, FALSE);

      DBUG_RETURN(1);

    }

  }


  DBUG_PRINT("NDB_SHARE", ("%s handler  use_count: %u",

                           m_share->key, m_share->use_count));

  thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0);


  if ((res= get_metadata(thd, name)))

  {

    local_close(thd, FALSE);

    DBUG_RETURN(res);

  }


  if ((res= update_stats(thd, 1, true)) ||

      (res= info(HA_STATUS_CONST)))

  {

    local_close(thd, TRUE);

    DBUG_RETURN(res);

  }

  if (ndb_binlog_is_read_only())

  {

    table->db_stat|= HA_READ_ONLY;

    sql_print_information("table '%s' opened read only", name);

  }

  DBUG_RETURN(0);

}


/*

 * Support for OPTIMIZE TABLE

 * reclaims unused space of deleted rows

 * and updates index statistics

 */

int ha_ndbcluster::optimize(THD* thd, HA_CHECK_OPT* check_opt)

{

  ulong error, stats_error= 0;

  const uint delay= (uint)THDVAR(thd, optimization_delay);


  error= ndb_optimize_table(thd, delay);

  stats_error= update_stats(thd, 1);

  return (error) ? error : stats_error;

}


int ha_ndbcluster::ndb_optimize_table(THD* thd, uint delay)

{

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  Ndb *ndb= thd_ndb->ndb;

  NDBDICT *dict= ndb->getDictionary();

  int result=0, error= 0;

  uint i;

  NdbDictionary::OptimizeTableHandle th;

  NdbDictionary::OptimizeIndexHandle ih;


  DBUG_ENTER("ndb_optimize_table");

  if ((error= dict->optimizeTable(*m_table, th)))

  {

    DBUG_PRINT("info",

               ("Optimze table %s returned %d", m_tabname, error));

    ERR_RETURN(ndb->getNdbError());

  }

  while((result= th.next()) == 1)

  {

    if (thd->killed)

      DBUG_RETURN(-1);

    my_sleep(1000*delay);

  }

  if (result == -1 || th.close() == -1)

  {

    DBUG_PRINT("info",

               ("Optimize table %s did not complete", m_tabname));

    ERR_RETURN(ndb->getNdbError());

  };

  for (i= 0; i < MAX_KEY; i++)

  {

    if (thd->killed)

      DBUG_RETURN(-1);

    if (m_index[i].status == ACTIVE)

    {

      const NdbDictionary::Index *index= m_index[i].index;

      const NdbDictionary::Index *unique_index= m_index[i].unique_index;


      if (index)

      {

        if ((error= dict->optimizeIndex(*index, ih)))

        {

          DBUG_PRINT("info",

                     ("Optimze index %s returned %d",

                      index->getName(), error));

          ERR_RETURN(ndb->getNdbError());


        }

        while((result= ih.next()) == 1)

        {

          if (thd->killed)

            DBUG_RETURN(-1);

          my_sleep(1000*delay);

        }

        if (result == -1 || ih.close() == -1)

        {

          DBUG_PRINT("info",

                     ("Optimize index %s did not complete", index->getName()));

          ERR_RETURN(ndb->getNdbError());

        }

      }

      if (unique_index)

      {

        if ((error= dict->optimizeIndex(*unique_index, ih)))

        {

          DBUG_PRINT("info",

                     ("Optimze unique index %s returned %d",

                      unique_index->getName(), error));

          ERR_RETURN(ndb->getNdbError());

        }

        while((result= ih.next()) == 1)

        {

          if (thd->killed)

            DBUG_RETURN(-1);

          my_sleep(1000*delay);

        }

        if (result == -1 || ih.close() == -1)

        {

          DBUG_PRINT("info",

                     ("Optimize index %s did not complete", index->getName()));

          ERR_RETURN(ndb->getNdbError());

        }

      }

    }

  }

  DBUG_RETURN(0);

}


int ha_ndbcluster::analyze(THD* thd, HA_CHECK_OPT* check_opt)

{

  int err;

  if ((err= update_stats(thd, 1)) != 0)

    return err;

  const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&

                                THDVAR(thd, index_stat_enable);

  if (index_stat_enable)

  {

    if ((err= analyze_index(thd)) != 0)

      return err;

  }

  return 0;

}


int

ha_ndbcluster::analyze_index(THD *thd)

{

  DBUG_ENTER("ha_ndbcluster::analyze_index");


  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  Ndb *ndb= thd_ndb->ndb;


  uint inx_list[MAX_INDEXES];

  uint inx_count= 0;


  uint inx;

  for (inx= 0; inx < table_share->keys; inx++)

  {

    NDB_INDEX_TYPE idx_type= get_index_type(inx);


    if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||

         idx_type == UNIQUE_ORDERED_INDEX ||

         idx_type == ORDERED_INDEX))

    {

      if (inx_count < MAX_INDEXES)

        inx_list[inx_count++]= inx;

    }

  }


  if (inx_count != 0)

  {

    int err= ndb_index_stat_analyze(ndb, inx_list, inx_count);

    if (err != 0)

      DBUG_RETURN(err);

  }

  DBUG_RETURN(0);

}


/*

  Set partition info


  SYNOPSIS

    set_part_info()

    part_info


  RETURN VALUE

    NONE


  DESCRIPTION

    Set up partition info when handler object created

*/


void ha_ndbcluster::set_part_info(partition_info *part_info, bool early)

{

  DBUG_ENTER("ha_ndbcluster::set_part_info");

  m_part_info= part_info;

  if (!early)

  {

    m_use_partition_pruning= FALSE;

    if (!(m_part_info->part_type == HASH_PARTITION &&

          m_part_info->list_of_part_fields &&

          !m_part_info->is_sub_partitioned()))

    {

      /*

        PARTITION BY HASH, RANGE and LIST plus all subpartitioning variants

        all use MySQL defined partitioning. PARTITION BY KEY uses NDB native

        partitioning scheme.

      */

      m_use_partition_pruning= TRUE;

      m_user_defined_partitioning= TRUE;

    }

    if (m_part_info->part_type == HASH_PARTITION &&

        m_part_info->list_of_part_fields &&

        partition_info_num_full_part_fields(m_part_info) == 0)

    {

      /*

        CREATE TABLE t (....) ENGINE NDB PARTITON BY KEY();

        where no primary key is defined uses a hidden key as partition field

        and this makes it impossible to use any partition pruning. Partition

        pruning requires partitioning based on real fields, also the lack of

        a primary key means that all accesses to tables are based on either

        full table scans or index scans and they can never be pruned those

        scans given that the hidden key is unknown. In write_row, update_row,

        and delete_row the normal hidden key handling will fix things.

      */

      m_use_partition_pruning= FALSE;

    }

    DBUG_PRINT("info", ("m_use_partition_pruning = %d",

                         m_use_partition_pruning));

  }

  DBUG_VOID_RETURN;

}


void ha_ndbcluster::local_close(THD *thd, bool release_metadata_flag)

{

  Ndb *ndb;

  DBUG_ENTER("ha_ndbcluster::local_close");

  if (m_key_fields)

  {

    MY_BITMAP **inx_bitmap;

    for (inx_bitmap= m_key_fields;

         (inx_bitmap != NULL) && ((*inx_bitmap) != NULL);

         inx_bitmap++)

      if ((*inx_bitmap)->bitmap != m_pk_bitmap_buf)

        bitmap_free(*inx_bitmap);

    my_free((char*)m_key_fields, MYF(0));

    m_key_fields= NULL;

  }

  if (m_share)

  {

    /* ndb_share reference handler free */

    DBUG_PRINT("NDB_SHARE", ("%s handler free  use_count: %u",

                             m_share->key, m_share->use_count));

    free_share(&m_share);

  }

  m_share= 0;

  if (release_metadata_flag)

  {

    ndb= thd ? check_ndb_in_thd(thd) : g_ndb;

    release_metadata(thd, ndb);

  }

  DBUG_VOID_RETURN;

}


int ha_ndbcluster::close(void)

{

  DBUG_ENTER("close");

  THD *thd= table->in_use;

  local_close(thd, TRUE);

  DBUG_RETURN(0);

}


int ha_ndbcluster::check_ndb_connection(THD* thd)

{

  Ndb *ndb;

  DBUG_ENTER("check_ndb_connection");


  if (!(ndb= check_ndb_in_thd(thd, true)))

    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  if (ndb->setDatabaseName(m_dbname))

  {

    ERR_RETURN(ndb->getNdbError());

  }

  DBUG_RETURN(0);

}


static int ndbcluster_close_connection(handlerton *hton, THD *thd)

{

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  DBUG_ENTER("ndbcluster_close_connection");

  if (thd_ndb)

  {

    Thd_ndb::release(thd_ndb);

    thd_set_thd_ndb(thd, NULL);

  }

  DBUG_RETURN(0);

}


int ndbcluster_discover(handlerton *hton, THD* thd, const char *db,

                        const char *name,

                        uchar **frmblob,

                        size_t *frmlen)

{

  int error= 0;

  NdbError ndb_error;

  size_t len;

  uchar* data= NULL;

  Ndb* ndb;

  char key[FN_REFLEN + 1];

  DBUG_ENTER("ndbcluster_discover");

  DBUG_PRINT("enter", ("db: %s, name: %s", db, name));


  if (!(ndb= check_ndb_in_thd(thd)))

    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  if (ndb->setDatabaseName(db))

  {

    ERR_RETURN(ndb->getNdbError());

  }

  NDBDICT* dict= ndb->getDictionary();

  build_table_filename(key, sizeof(key) - 1, db, name, "", 0);

  /* ndb_share reference temporary */

  NDB_SHARE *share= get_share(key, 0, FALSE);

  if (share)

  {

    DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",

                             share->key, share->use_count));

  }

  if (share && get_ndb_share_state(share) == NSS_ALTERED)

  {

    // Frm has been altered on disk, but not yet written to ndb

    if (readfrm(key, &data, &len))

    {

      DBUG_PRINT("error", ("Could not read frm"));

      error= 1;

      goto err;

    }

  }

  else

  {

    Ndb_table_guard ndbtab_g(dict, name);

    const NDBTAB *tab= ndbtab_g.get_table();

    if (!tab)

    {

      const NdbError err= dict->getNdbError();

      if (err.code == 709 || err.code == 723)

      {

        error= -1;

        DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));

      }

      else

      {

        error= -1;

        ndb_error= err;

        DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));

      }

      goto err;

    }

    DBUG_PRINT("info", ("Found table %s", tab->getName()));


    len= tab->getFrmLength();

    if (len == 0 || tab->getFrmData() == NULL)

    {

      DBUG_PRINT("error", ("No frm data found."));

      error= 1;

      goto err;

    }


    if (unpackfrm(&data, &len, (uchar*) tab->getFrmData()))

    {

      DBUG_PRINT("error", ("Could not unpack table"));

      error= 1;

      goto err;

    }

  }

#ifdef HAVE_NDB_BINLOG

  if (ndbcluster_check_if_local_table(db, name) &&

      !Ndb_dist_priv_util::is_distributed_priv_table(db, name))

  {

    DBUG_PRINT("info", ("ndbcluster_discover: Skipping locally defined table '%s.%s'",

                        db, name));

    sql_print_error("ndbcluster_discover: Skipping locally defined table '%s.%s'",

                    db, name);

    error= 1;

    goto err;

  }

#endif

  *frmlen= len;

  *frmblob= data;


  if (share)

  {

    /* ndb_share reference temporary free */

    DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                             share->key, share->use_count));

    free_share(&share);

  }


  DBUG_RETURN(0);

err:

  my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));

  if (share)

  {

    /* ndb_share reference temporary free */

    DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                             share->key, share->use_count));

    free_share(&share);

  }


  if (ndb_error.code)

  {

    ERR_RETURN(ndb_error);

  }

  DBUG_RETURN(error);

}


int ndbcluster_table_exists_in_engine(handlerton *hton, THD* thd,

                                      const char *db,

                                      const char *name)

{

  Ndb* ndb;

  DBUG_ENTER("ndbcluster_table_exists_in_engine");

  DBUG_PRINT("enter", ("db: %s  name: %s", db, name));


  if (!(ndb= check_ndb_in_thd(thd)))

    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  NDBDICT* dict= ndb->getDictionary();

  NdbDictionary::Dictionary::List list;

  if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)

  {

    ERR_RETURN(dict->getNdbError());

  }

  for (uint i= 0 ; i < list.count ; i++)

  {

    NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];

    if (my_strcasecmp(table_alias_charset, elmt.database, db))

      continue;

    if (my_strcasecmp(table_alias_charset, elmt.name, name))

      continue;

    DBUG_PRINT("info", ("Found table"));

    DBUG_RETURN(HA_ERR_TABLE_EXIST);

  }

  DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);

}


extern "C" uchar* tables_get_key(const char *entry, size_t *length,

                                my_bool not_used __attribute__((unused)))

{

  *length= strlen(entry);

  return (uchar*) entry;

}


int ndbcluster_drop_database_impl(THD *thd, const char *path)

{

  DBUG_ENTER("ndbcluster_drop_database");

  char dbname[FN_HEADLEN];

  Ndb* ndb;

  NdbDictionary::Dictionary::List list;

  uint i;

  char *tabname;

  List<char> drop_list;

  int ret= 0;

  ha_ndbcluster::set_dbname(path, (char *)&dbname);

  DBUG_PRINT("enter", ("db: %s", dbname));


  if (!(ndb= check_ndb_in_thd(thd)))

    DBUG_RETURN(-1);


  // List tables in NDB

  NDBDICT *dict= ndb->getDictionary();

  if (dict->listObjects(list,

                        NdbDictionary::Object::UserTable) != 0)

    DBUG_RETURN(-1);

  for (i= 0 ; i < list.count ; i++)

  {

    NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];

    DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));


    // Add only tables that belongs to db

    // Ignore Blob part tables - they are deleted when their table

    // is deleted.

    if (my_strcasecmp(system_charset_info, elmt.database, dbname) ||

        IS_NDB_BLOB_PREFIX(elmt.name))

      continue;

    DBUG_PRINT("info", ("%s must be dropped", elmt.name));

    drop_list.push_back(thd->strdup(elmt.name));

  }

  // Drop any tables belonging to database

  char full_path[FN_REFLEN + 1];

  char *tmp= full_path +

    build_table_filename(full_path, sizeof(full_path) - 1, dbname, "", "", 0);

  if (ndb->setDatabaseName(dbname))

  {

    ERR_RETURN(ndb->getNdbError());

  }

  List_iterator_fast<char> it(drop_list);

  while ((tabname=it++))

  {

    tablename_to_filename(tabname, tmp, FN_REFLEN - (tmp - full_path)-1);

    if (ha_ndbcluster::drop_table_impl(thd, 0, ndb, full_path, dbname, tabname))

    {

      const NdbError err= dict->getNdbError();

      if (err.code != 709 && err.code != 723)

      {

        ret= ndb_to_mysql_error(&err);

      }

    }

  }


  dict->invalidateDbGlobal(dbname);

  DBUG_RETURN(ret);

}


static void ndbcluster_drop_database(handlerton *hton, char *path)

{

  THD *thd= current_thd;

  DBUG_ENTER("ndbcluster_drop_database");

  /*

    Don't allow drop database unless

    schema distribution table is setup

  */

  if (!ndb_schema_share)

  {

    DBUG_PRINT("info", ("Schema distribution table not setup"));

    DBUG_VOID_RETURN;

  }

  ndbcluster_drop_database_impl(thd, path);

  char db[FN_REFLEN];

  ha_ndbcluster::set_dbname(path, db);

  uint32 table_id= 0, table_version= 0;

  /*

    Since databases aren't real ndb schema object

    they don't have any id/version


    But since that id/version is used to make sure that event's on SCHEMA_TABLE

    is correct, we set random numbers

  */

  table_id = (uint32)rand();

  table_version = (uint32)rand();

  ndbcluster_log_schema_op(thd,

                           thd->query(), thd->query_length(),

                           db, "", table_id, table_version,

                           SOT_DROP_DB, NULL, NULL);

  DBUG_VOID_RETURN;

}


int ndb_create_table_from_engine(THD *thd, const char *db,

                                 const char *table_name)

{

  // Copy db and table_name to stack buffers since functions used by

  // ha_create_table_from_engine may convert to lowercase on some platforms

  char db_buf[FN_REFLEN + 1];

  char table_name_buf[FN_REFLEN + 1];

  strnmov(db_buf, db, sizeof(db_buf));

  strnmov(table_name_buf, table_name, sizeof(table_name_buf));


  LEX *old_lex= thd->lex, newlex;

  thd->lex= &newlex;

  newlex.current_select= NULL;

  lex_start(thd);

  int res= ha_create_table_from_engine(thd, db_buf, table_name_buf);

  thd->lex= old_lex;

  return res;

}


/*

  find all tables in ndb and discover those needed

*/

int ndbcluster_find_all_files(THD *thd)

{

  Ndb* ndb;

  char key[FN_REFLEN + 1];

  NDBDICT *dict;

  int unhandled, retries= 5, skipped;

  DBUG_ENTER("ndbcluster_find_all_files");


  if (!(ndb= check_ndb_in_thd(thd)))

    DBUG_RETURN(HA_ERR_NO_CONNECTION);


  dict= ndb->getDictionary();


  LINT_INIT(unhandled);

  LINT_INIT(skipped);

  do

  {

    NdbDictionary::Dictionary::List list;

    if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)

      ERR_RETURN(dict->getNdbError());

    unhandled= 0;

    skipped= 0;

    retries--;

    for (uint i= 0 ; i < list.count ; i++)

    {

      NDBDICT::List::Element& elmt= list.elements[i];

      if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))

      {

        DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));

        continue;

      }

      DBUG_PRINT("info", ("Found %s.%s in NDB", elmt.database, elmt.name));

      if (elmt.state != NDBOBJ::StateOnline &&

          elmt.state != NDBOBJ::StateBackup &&

          elmt.state != NDBOBJ::StateBuilding)

      {

        sql_print_information("NDB: skipping setup table %s.%s, in state %d",

                              elmt.database, elmt.name, elmt.state);

        skipped++;

        continue;

      }


      ndb->setDatabaseName(elmt.database);

      Ndb_table_guard ndbtab_g(dict, elmt.name);

      const NDBTAB *ndbtab= ndbtab_g.get_table();

      if (!ndbtab)

      {

        if (retries == 0)

          sql_print_error("NDB: failed to setup table %s.%s, error: %d, %s",

                          elmt.database, elmt.name,

                          dict->getNdbError().code,

                          dict->getNdbError().message);

        unhandled++;

        continue;

      }


      if (ndbtab->getFrmLength() == 0)

        continue;


      /* check if database exists */

      char *end= key +

        build_table_filename(key, sizeof(key) - 1, elmt.database, "", "", 0);

      if (my_access(key, F_OK))

      {

        /* no such database defined, skip table */

        continue;

      }

      /* finalize construction of path */

      end+= tablename_to_filename(elmt.name, end,

                                  sizeof(key)-(end-key));

      uchar *data= 0, *pack_data= 0;

      size_t length, pack_length;

      int discover= 0;

      if (readfrm(key, &data, &length) ||

          packfrm(data, length, &pack_data, &pack_length))

      {

        discover= 1;

        sql_print_information("NDB: missing frm for %s.%s, discovering...",

                              elmt.database, elmt.name);

      }

      else if (cmp_frm(ndbtab, pack_data, pack_length))

      {

        /* ndb_share reference temporary */

        NDB_SHARE *share= get_share(key, 0, FALSE);

        if (share)

        {

          DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",

                                   share->key, share->use_count));

        }

        if (!share || get_ndb_share_state(share) != NSS_ALTERED)

        {

          discover= 1;

          sql_print_information("NDB: mismatch in frm for %s.%s, discovering...",

                                elmt.database, elmt.name);

        }

        if (share)

        {

          /* ndb_share reference temporary free */

          DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                                   share->key, share->use_count));

          free_share(&share);

        }

      }

      my_free((char*) data, MYF(MY_ALLOW_ZERO_PTR));

      my_free((char*) pack_data, MYF(MY_ALLOW_ZERO_PTR));


      if (discover)

      {

        /* ToDo 4.1 database needs to be created if missing */

        if (ndb_create_table_from_engine(thd, elmt.database, elmt.name))

        {

          /* ToDo 4.1 handle error */

        }

      }

      else

      {

        /* set up replication for this table */

        ndbcluster_create_binlog_setup(thd, ndb, key, end-key,

                                       elmt.database, elmt.name,

                                       0);

      }

    }

  }

  while (unhandled && retries);


  DBUG_RETURN(-(skipped + unhandled));

}


static int

ndbcluster_find_files(handlerton *hton, THD *thd,

                      const char *db, const char *path,

                      const char *wild, bool dir, List<LEX_STRING> *files)

{

  DBUG_ENTER("ndbcluster_find_files");

  DBUG_PRINT("enter", ("db: %s", db));

  { // extra bracket to avoid gcc 2.95.3 warning

  uint i;

  Thd_ndb *thd_ndb;

  Ndb* ndb;

  char name[FN_REFLEN + 1];

  HASH ndb_tables, ok_tables;

  NDBDICT::List list;


  if (!(ndb= check_ndb_in_thd(thd)))

    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  thd_ndb= get_thd_ndb(thd);


  if (dir)

    DBUG_RETURN(0); // Discover of databases not yet supported


  Ndb_global_schema_lock_guard ndb_global_schema_lock_guard(thd);

  if (ndb_global_schema_lock_guard.lock())

    DBUG_RETURN(HA_ERR_NO_CONNECTION);


  // List tables in NDB

  NDBDICT *dict= ndb->getDictionary();

  if (dict->listObjects(list,

                        NdbDictionary::Object::UserTable) != 0)

    ERR_RETURN(dict->getNdbError());


  if (my_hash_init(&ndb_tables, table_alias_charset,list.count,0,0,

                   (my_hash_get_key)tables_get_key,0,0))

  {

    DBUG_PRINT("error", ("Failed to init HASH ndb_tables"));

    DBUG_RETURN(-1);

  }


  if (my_hash_init(&ok_tables, system_charset_info,32,0,0,

                   (my_hash_get_key)tables_get_key,0,0))

  {

    DBUG_PRINT("error", ("Failed to init HASH ok_tables"));

    my_hash_free(&ndb_tables);

    DBUG_RETURN(-1);

  }


  for (i= 0 ; i < list.count ; i++)

  {

    NDBDICT::List::Element& elmt= list.elements[i];

    if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))

    {

      DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));

      continue;

    }

    DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));


    // Add only tables that belongs to db

    if (my_strcasecmp(system_charset_info, elmt.database, db))

      continue;


    // Apply wildcard to list of tables in NDB

    if (wild)

    {

      if (lower_case_table_names)

      {

        if (wild_case_compare(files_charset_info, elmt.name, wild))

          continue;

      }

      else if (wild_compare(elmt.name,wild,0))

        continue;

    }

    DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name));

    my_hash_insert(&ndb_tables, (uchar*)thd->strdup(elmt.name));

  }


  LEX_STRING *file_name;

  List_iterator<LEX_STRING> it(*files);

  List<char> delete_list;

  char *file_name_str;

  while ((file_name=it++))

  {

    bool file_on_disk= FALSE;

    DBUG_PRINT("info", ("%s", file_name->str));

    if (my_hash_search(&ndb_tables,

                       (const uchar*)file_name->str, file_name->length))

    {

      build_table_filename(name, sizeof(name) - 1, db,

                           file_name->str, reg_ext, 0);

      if (my_access(name, F_OK))

      {

        DBUG_PRINT("info", ("Table %s listed and need discovery",

                            file_name->str));

        if (ndb_create_table_from_engine(thd, db, file_name->str))

        {

          push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                              ER_TABLE_EXISTS_ERROR,

                              "Discover of table %s.%s failed",

                              db, file_name->str);

          continue;

        }

      }

      DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name->str));

      file_on_disk= TRUE;

    }


    // Check for .ndb file with this name

    build_table_filename(name, sizeof(name) - 1, db,

                         file_name->str, ha_ndb_ext, 0);

    DBUG_PRINT("info", ("Check access for %s", name));

    if (my_access(name, F_OK))

    {

      DBUG_PRINT("info", ("%s did not exist on disk", name));

      // .ndb file did not exist on disk, another table type

      if (file_on_disk)

      {

        // Ignore this ndb table

        uchar *record= my_hash_search(&ndb_tables,

                                      (const uchar*) file_name->str,

                                      file_name->length);

        DBUG_ASSERT(record);

        my_hash_delete(&ndb_tables, record);

        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                            ER_TABLE_EXISTS_ERROR,

                            "Local table %s.%s shadows ndb table",

                            db, file_name->str);

      }

      continue;

    }

    if (file_on_disk)

    {

      // File existed in NDB and as frm file, put in ok_tables list

      my_hash_insert(&ok_tables, (uchar*) file_name->str);

      continue;

    }

    DBUG_PRINT("info", ("%s existed on disk", name));

    // The .ndb file exists on disk, but it's not in list of tables in ndb

    // Verify that handler agrees table is gone.

    if (ndbcluster_table_exists_in_engine(hton, thd, db, file_name->str) ==

        HA_ERR_NO_SUCH_TABLE)

    {

      DBUG_PRINT("info", ("NDB says %s does not exists", file_name->str));

      it.remove();

      // Put in list of tables to remove from disk

      delete_list.push_back(thd->strdup(file_name->str));

    }

  }


  /* setup logging to binlog for all discovered tables */

  {

    char *end, *end1= name +

      build_table_filename(name, sizeof(name) - 1, db, "", "", 0);

    for (i= 0; i < ok_tables.records; i++)

    {

      file_name_str= (char*)my_hash_element(&ok_tables, i);

      end= end1 +

        tablename_to_filename(file_name_str, end1, sizeof(name) - (end1 - name));

      ndbcluster_create_binlog_setup(thd, ndb, name, end-name,

                                     db, file_name_str, 0);

    }

  }


  // Check for new files to discover

  DBUG_PRINT("info", ("Checking for new files to discover"));

  List<char> create_list;

  for (i= 0 ; i < ndb_tables.records ; i++)

  {

    file_name_str= (char*) my_hash_element(&ndb_tables, i);

    if (!my_hash_search(&ok_tables,

                        (const uchar*) file_name_str, strlen(file_name_str)))

    {

      build_table_filename(name, sizeof(name) - 1,

                           db, file_name_str, reg_ext, 0);

      if (my_access(name, F_OK))

      {

        DBUG_PRINT("info", ("%s must be discovered", file_name_str));

        // File is in list of ndb tables and not in ok_tables

        // This table need to be created

        create_list.push_back(thd->strdup(file_name_str));

      }

    }

  }


#ifndef NDB_NO_MYSQL_RM_TABLE_PART2

  /*

    Delete old files


    ndbcluster_find_files() may be called from I_S code and ndbcluster_binlog

    thread in situations when some tables are already open. This means that

    code below will try to obtain exclusive metadata lock on some table

    while holding shared meta-data lock on other tables. This might lead to a

    deadlock but such a deadlock should be detected by MDL deadlock detector.

  */

  List_iterator_fast<char> it3(delete_list);

  while ((file_name_str= it3++))

  {

    DBUG_PRINT("info", ("Removing table %s/%s", db, file_name_str));

    // Delete the table and all related files

    TABLE_LIST table_list;

    table_list.init_one_table(db, strlen(db),

                              file_name_str, strlen(file_name_str),

                              file_name_str,

                              TL_WRITE);

    table_list.mdl_request.set_type(MDL_EXCLUSIVE);

    /*

      set TNO_NO_NDB_DROP_TABLE flag to not drop ndb table.

      it should not exist anyways

    */

    thd_ndb->options|= TNO_NO_NDB_DROP_TABLE;

    (void)mysql_rm_table_part2(thd, &table_list,

                               false,   /* if_exists */

                               false,   /* drop_temporary */

                               false,   /* drop_view */

                               true     /* dont_log_query*/);

    thd_ndb->options&= ~TNO_NO_NDB_DROP_TABLE;

    trans_commit_implicit(thd); /* Safety, should be unnecessary. */

    thd->mdl_context.release_transactional_locks();

    /* Clear error message that is returned when table is deleted */

    thd->clear_error();

  }

#endif


  // Create new files

  List_iterator_fast<char> it2(create_list);

  while ((file_name_str=it2++))

  {

    DBUG_PRINT("info", ("Table %s need discovery", file_name_str));

    if (ndb_create_table_from_engine(thd, db, file_name_str) == 0)

    {

      LEX_STRING *tmp_file_name= 0;

      tmp_file_name= thd->make_lex_string(tmp_file_name, file_name_str,

                                          strlen(file_name_str), TRUE);

      files->push_back(tmp_file_name);

    }

  }


  my_hash_free(&ok_tables);

  my_hash_free(&ndb_tables);


  // Delete schema file from files

  if (!strcmp(db, NDB_REP_DB))

  {

    uint count = 0;

    while (count++ < files->elements)

    {

      file_name = (LEX_STRING *)files->pop();

      if (!strcmp(file_name->str, NDB_SCHEMA_TABLE))

      {

        DBUG_PRINT("info", ("skip %s.%s table, it should be hidden to user",

                   NDB_REP_DB, NDB_SCHEMA_TABLE));

        continue;

      }

      files->push_back(file_name);

    }

  }

  } // extra bracket to avoid gcc 2.95.3 warning

  DBUG_RETURN(0);

}


/*

  Initialise all gloal variables before creating

  a NDB Cluster table handler

 */


/* Call back after cluster connect */

static int connect_callback()

{

  pthread_mutex_lock(&LOCK_ndb_util_thread);

  update_status_variables(NULL, &g_ndb_status,

                          g_ndb_cluster_connection);


  uint node_id, i= 0;

  Ndb_cluster_connection_node_iter node_iter;

  memset((void *)g_node_id_map, 0xFFFF, sizeof(g_node_id_map));

  while ((node_id= g_ndb_cluster_connection->get_next_node(node_iter)))

    g_node_id_map[node_id]= i++;


  pthread_cond_signal(&COND_ndb_util_thread);

  pthread_mutex_unlock(&LOCK_ndb_util_thread);

  return 0;

}


#ifndef NDB_NO_WAIT_SETUP

static int ndb_wait_setup_func_impl(ulong max_wait)

{

  DBUG_ENTER("ndb_wait_setup_func_impl");


  pthread_mutex_lock(&ndbcluster_mutex);


  struct timespec abstime;

  set_timespec(abstime, 1);


  while (!ndb_setup_complete && max_wait)

  {

    int rc= pthread_cond_timedwait(&COND_ndb_setup_complete,

                                   &ndbcluster_mutex,

                                   &abstime);

    if (rc)

    {

      if (rc == ETIMEDOUT)

      {

        DBUG_PRINT("info", ("1s elapsed waiting"));

        max_wait--;

        set_timespec(abstime, 1); /* 1 second from now*/

      }

      else

      {

        DBUG_PRINT("info", ("Bad pthread_cond_timedwait rc : %u",

                            rc));

        assert(false);

        break;

      }

    }

  }


  pthread_mutex_unlock(&ndbcluster_mutex);


  DBUG_RETURN((ndb_setup_complete == 1)? 0 : 1);

}


int(*ndb_wait_setup_func)(ulong) = 0;

#endif

extern int ndb_dictionary_is_mysqld;


static int ndbcluster_init(void *p)

{

  DBUG_ENTER("ndbcluster_init");


  if (ndbcluster_inited)

    DBUG_RETURN(FALSE);


  pthread_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST);

  pthread_mutex_init(&LOCK_ndb_util_thread, MY_MUTEX_INIT_FAST);

  pthread_cond_init(&COND_ndb_util_thread, NULL);

  pthread_cond_init(&COND_ndb_util_ready, NULL);

  pthread_cond_init(&COND_ndb_setup_complete, NULL);

  ndb_util_thread_running= -1;

  pthread_mutex_init(&LOCK_ndb_index_stat_thread, MY_MUTEX_INIT_FAST);

  pthread_cond_init(&COND_ndb_index_stat_thread, NULL);

  pthread_cond_init(&COND_ndb_index_stat_ready, NULL);

  pthread_mutex_init(&ndb_index_stat_list_mutex, MY_MUTEX_INIT_FAST);

  pthread_mutex_init(&ndb_index_stat_stat_mutex, MY_MUTEX_INIT_FAST);

  pthread_cond_init(&ndb_index_stat_stat_cond, NULL);

  ndb_index_stat_thread_running= -1;

  ndbcluster_terminating= 0;

  ndb_dictionary_is_mysqld= 1;

  ndb_setup_complete= 0;

  ndbcluster_hton= (handlerton *)p;

  ndbcluster_global_schema_lock_init(ndbcluster_hton);


  {

    handlerton *h= ndbcluster_hton;

    h->state=            SHOW_OPTION_YES;

    h->db_type=          DB_TYPE_NDBCLUSTER;

    h->close_connection= ndbcluster_close_connection;

    h->commit=           ndbcluster_commit;

    h->rollback=         ndbcluster_rollback;

    h->create=           ndbcluster_create_handler; /* Create a new handler */

    h->drop_database=    ndbcluster_drop_database;  /* Drop a database */

    h->panic=            ndbcluster_end;            /* Panic call */

    h->show_status=      ndbcluster_show_status;    /* Show status */

    h->alter_tablespace= ndbcluster_alter_tablespace;    /* Show status */

    h->partition_flags=  ndbcluster_partition_flags; /* Partition flags */

    h->alter_table_flags=

      ndbcluster_alter_table_flags;                 /* Alter table flags */

#if MYSQL_VERSION_ID >= 50501

    h->fill_is_table=    ndbcluster_fill_is_table;

#else

    h->fill_files_table= ndbcluster_fill_files_table;

#endif

    ndbcluster_binlog_init_handlerton();

    h->flags=            HTON_CAN_RECREATE | HTON_TEMPORARY_NOT_SUPPORTED;

    h->discover=         ndbcluster_discover;

    h->find_files=       ndbcluster_find_files;

    h->table_exists_in_engine= ndbcluster_table_exists_in_engine;

#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

    h->make_pushed_join= ndbcluster_make_pushed_join;

#endif

  }


  // Initialize ndb interface

  ndb_init_internal();


  /* allocate connection resources and connect to cluster */

  const uint global_opti_node_select= THDVAR(NULL, optimized_node_selection);

  if (ndbcluster_connect(connect_callback, opt_ndb_wait_connected,

                         opt_ndb_cluster_connection_pool,

                         (global_opti_node_select & 1),

                         opt_ndb_connectstring,

                         opt_ndb_nodeid))

  {

    DBUG_PRINT("error", ("Could not initiate connection to cluster"));

    goto ndbcluster_init_error;

  }


  (void) my_hash_init(&ndbcluster_open_tables,table_alias_charset,32,0,0,

                      (my_hash_get_key) ndbcluster_get_key,0,0);

  /* start the ndb injector thread */

  if (ndbcluster_binlog_start())

  {

    DBUG_PRINT("error", ("Could start the injector thread"));

    goto ndbcluster_init_error;

  }


  // Create utility thread

  pthread_t tmp;

  if (pthread_create(&tmp, &connection_attrib, ndb_util_thread_func, 0))

  {

    DBUG_PRINT("error", ("Could not create ndb utility thread"));

    my_hash_free(&ndbcluster_open_tables);

    pthread_mutex_destroy(&ndbcluster_mutex);

    pthread_mutex_destroy(&LOCK_ndb_util_thread);

    pthread_cond_destroy(&COND_ndb_util_thread);

    pthread_cond_destroy(&COND_ndb_util_ready);

    pthread_cond_destroy(&COND_ndb_setup_complete);

    goto ndbcluster_init_error;

  }


  /* Wait for the util thread to start */

  pthread_mutex_lock(&LOCK_ndb_util_thread);

  while (ndb_util_thread_running < 0)

    pthread_cond_wait(&COND_ndb_util_ready, &LOCK_ndb_util_thread);

  pthread_mutex_unlock(&LOCK_ndb_util_thread);


  if (!ndb_util_thread_running)

  {

    DBUG_PRINT("error", ("ndb utility thread exited prematurely"));

    my_hash_free(&ndbcluster_open_tables);

    pthread_mutex_destroy(&ndbcluster_mutex);

    pthread_mutex_destroy(&LOCK_ndb_util_thread);

    pthread_cond_destroy(&COND_ndb_util_thread);

    pthread_cond_destroy(&COND_ndb_util_ready);

    pthread_cond_destroy(&COND_ndb_setup_complete);

    goto ndbcluster_init_error;

  }


  // Create index statistics thread

  pthread_t tmp2;

  if (pthread_create(&tmp2, &connection_attrib, ndb_index_stat_thread_func, 0))

  {

    DBUG_PRINT("error", ("Could not create ndb index statistics thread"));

    my_hash_free(&ndbcluster_open_tables);

    pthread_mutex_destroy(&ndbcluster_mutex);

    pthread_mutex_destroy(&LOCK_ndb_index_stat_thread);

    pthread_cond_destroy(&COND_ndb_index_stat_thread);

    pthread_cond_destroy(&COND_ndb_index_stat_ready);

    pthread_mutex_destroy(&ndb_index_stat_list_mutex);

    pthread_mutex_destroy(&ndb_index_stat_stat_mutex);

    pthread_cond_destroy(&ndb_index_stat_stat_cond);

    goto ndbcluster_init_error;

  }


  /* Wait for the index statistics thread to start */

  pthread_mutex_lock(&LOCK_ndb_index_stat_thread);

  while (ndb_index_stat_thread_running < 0)

    pthread_cond_wait(&COND_ndb_index_stat_ready, &LOCK_ndb_index_stat_thread);

  pthread_mutex_unlock(&LOCK_ndb_index_stat_thread);


  if (!ndb_index_stat_thread_running)

  {

    DBUG_PRINT("error", ("ndb index statistics thread exited prematurely"));

    my_hash_free(&ndbcluster_open_tables);

    pthread_mutex_destroy(&ndbcluster_mutex);

    pthread_mutex_destroy(&LOCK_ndb_index_stat_thread);

    pthread_cond_destroy(&COND_ndb_index_stat_thread);

    pthread_cond_destroy(&COND_ndb_index_stat_ready);

    pthread_mutex_destroy(&ndb_index_stat_list_mutex);

    pthread_mutex_destroy(&ndb_index_stat_stat_mutex);

    pthread_cond_destroy(&ndb_index_stat_stat_cond);

    goto ndbcluster_init_error;

  }


#ifndef NDB_NO_WAIT_SETUP

  ndb_wait_setup_func= ndb_wait_setup_func_impl;

#endif


  memset(&g_slave_api_client_stats, 0, sizeof(g_slave_api_client_stats));


  ndbcluster_inited= 1;

  DBUG_RETURN(FALSE);


ndbcluster_init_error:

  /* disconnect from cluster and free connection resources */

  ndbcluster_disconnect();

  ndbcluster_hton->state= SHOW_OPTION_DISABLED;               // If we couldn't use handler


  ndbcluster_global_schema_lock_deinit();


  DBUG_RETURN(TRUE);

}


#ifndef DBUG_OFF

static

const char*

get_share_state_string(NDB_SHARE_STATE s)

{

  switch(s) {

  case NSS_INITIAL:

    return "NSS_INITIAL";

  case NSS_ALTERED:

    return "NSS_ALTERED";

  case NSS_DROPPED:

    return "NSS_DROPPED";

  }

  assert(false);

  return "<unknown>";

}

#endif


int ndbcluster_binlog_end(THD *thd);


static int ndbcluster_end(handlerton *hton, ha_panic_function type)

{

  DBUG_ENTER("ndbcluster_end");


  if (!ndbcluster_inited)

    DBUG_RETURN(0);

  ndbcluster_inited= 0;


  /* wait for index stat thread to finish */

  sql_print_information("Stopping Cluster Index Statistics thread");

  pthread_mutex_lock(&LOCK_ndb_index_stat_thread);

  ndbcluster_terminating= 1;

  pthread_cond_signal(&COND_ndb_index_stat_thread);

  while (ndb_index_stat_thread_running > 0)

    pthread_cond_wait(&COND_ndb_index_stat_ready, &LOCK_ndb_index_stat_thread);

  pthread_mutex_unlock(&LOCK_ndb_index_stat_thread);


  /* wait for util and binlog thread to finish */

  ndbcluster_binlog_end(NULL);


  {

    pthread_mutex_lock(&ndbcluster_mutex);

    uint save = ndbcluster_open_tables.records; (void)save;

    while (ndbcluster_open_tables.records)

    {

      NDB_SHARE *share=

        (NDB_SHARE*) my_hash_element(&ndbcluster_open_tables, 0);

#ifndef DBUG_OFF

      fprintf(stderr,

              "NDB: table share %s with use_count %d state: %s(%u) not freed\n",

              share->key, share->use_count,

              get_share_state_string(share->state),

              (uint)share->state);

#endif

      ndbcluster_real_free_share(&share);

    }

    pthread_mutex_unlock(&ndbcluster_mutex);

    DBUG_ASSERT(save == 0);

  }

  my_hash_free(&ndbcluster_open_tables);


  ndb_index_stat_end();

  ndbcluster_disconnect();


  ndbcluster_global_schema_lock_deinit();


  // cleanup ndb interface

  ndb_end_internal();


  pthread_mutex_destroy(&ndbcluster_mutex);

  pthread_mutex_destroy(&LOCK_ndb_util_thread);

  pthread_cond_destroy(&COND_ndb_util_thread);

  pthread_cond_destroy(&COND_ndb_util_ready);

  pthread_cond_destroy(&COND_ndb_setup_complete);

  pthread_mutex_destroy(&LOCK_ndb_index_stat_thread);

  pthread_cond_destroy(&COND_ndb_index_stat_thread);

  pthread_cond_destroy(&COND_ndb_index_stat_ready);


  DBUG_RETURN(0);

}


void ha_ndbcluster::print_error(int error, myf errflag)

{

  DBUG_ENTER("ha_ndbcluster::print_error");

  DBUG_PRINT("enter", ("error: %d", error));


  if (error == HA_ERR_NO_PARTITION_FOUND)

    m_part_info->print_no_partition_found(table);

  else

  {

    if (error == HA_ERR_FOUND_DUPP_KEY &&

        (table == NULL || table->file == NULL))

    {

      /*

        This is a sideffect of 'ndbcluster_print_error' (called from

        'ndbcluster_commit' and 'ndbcluster_rollback') which realises

        that it "knows nothing" and creates a brand new ha_ndbcluster

        in order to be able to call the print_error() function.

        Unfortunately the new ha_ndbcluster hasn't been open()ed

        and thus table pointer etc. is not set. Since handler::print_error()

        will use that pointer without checking for NULL(it naturally

        assumes an error can only be returned when the handler is open)

        this would crash the mysqld unless it's handled here.

      */

      my_error(ER_DUP_KEY, errflag, table_share->table_name.str, error);

      DBUG_VOID_RETURN;

    }


    handler::print_error(error, errflag);

  }

  DBUG_VOID_RETURN;

}


void ndbcluster_print_error(int error, const NdbOperation *error_op)

{

  DBUG_ENTER("ndbcluster_print_error");

  TABLE_SHARE share;

  const char *tab_name= (error_op) ? error_op->getTableName() : "";

  if (tab_name == NULL)

  {

    DBUG_ASSERT(tab_name != NULL);

    tab_name= "";

  }

  share.db.str= (char*) "";

  share.db.length= 0;

  share.table_name.str= (char *) tab_name;

  share.table_name.length= strlen(tab_name);

  ha_ndbcluster error_handler(ndbcluster_hton, &share);

  error_handler.print_error(error, MYF(0));

  DBUG_VOID_RETURN;

}


void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)

{

  char *end, *ptr, *tmp_name;

  char tmp_buff[FN_REFLEN + 1];


  tmp_name= tmp_buff;

  /* Scan name from the end */

  ptr= strend(path_name)-1;

  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {

    ptr--;

  }

  ptr--;

  end= ptr;

  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {

    ptr--;

  }

  uint name_len= end - ptr;

  memcpy(tmp_name, ptr + 1, name_len);

  tmp_name[name_len]= '\0';

  filename_to_tablename(tmp_name, dbname, sizeof(tmp_buff) - 1);

}


void ha_ndbcluster::set_dbname(const char *path_name)

{

  set_dbname(path_name, m_dbname);

}


void

ha_ndbcluster::set_tabname(const char *path_name, char * tabname)

{

  char *end, *ptr, *tmp_name;

  char tmp_buff[FN_REFLEN + 1];


  tmp_name= tmp_buff;

  /* Scan name from the end */

  end= strend(path_name)-1;

  ptr= end;

  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {

    ptr--;

  }

  uint name_len= end - ptr;

  memcpy(tmp_name, ptr + 1, end - ptr);

  tmp_name[name_len]= '\0';

  filename_to_tablename(tmp_name, tabname, sizeof(tmp_buff) - 1);

}


void ha_ndbcluster::set_tabname(const char *path_name)

{

  set_tabname(path_name, m_tabname);

}


/*

  If there are no stored stats, should we do a tree-dive on all db

  nodes.  The result is fairly good but does mean a round-trip.

 */

static const bool g_ndb_records_in_range_tree_dive= false;


/* Determine roughly how many records are in the range specified */

ha_rows

ha_ndbcluster::records_in_range(uint inx, key_range *min_key,

                                key_range *max_key)

{

  KEY *key_info= table->key_info + inx;

  uint key_length= key_info->key_length;

  NDB_INDEX_TYPE idx_type= get_index_type(inx);


  DBUG_ENTER("records_in_range");

  // Prevent partial read of hash indexes by returning HA_POS_ERROR

  if ((idx_type == UNIQUE_INDEX || idx_type == PRIMARY_KEY_INDEX) &&

      ((min_key && min_key->length < key_length) ||

       (max_key && max_key->length < key_length)))

    DBUG_RETURN(HA_POS_ERROR);


  // Read from hash index with full key

  // This is a "const" table which returns only one record!

  if ((idx_type != ORDERED_INDEX) &&

      ((min_key && min_key->length == key_length) &&

       (max_key && max_key->length == key_length) &&

       (min_key->key==max_key->key ||

        memcmp(min_key->key, max_key->key, key_length)==0)))

    DBUG_RETURN(1);


  // XXX why this if

  if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||

       idx_type == UNIQUE_ORDERED_INDEX ||

       idx_type == ORDERED_INDEX))

  {

    THD *thd= current_thd;

    const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&

                                  THDVAR(thd, index_stat_enable);


    if (index_stat_enable)

    {

      ha_rows rows= HA_POS_ERROR;

      int err= ndb_index_stat_get_rir(inx, min_key, max_key, &rows);

      if (err == 0)

      {

        if (rows < 2)

          rows = 2;

        DBUG_RETURN(rows);

      }

      if (err != 0 &&

          /* no stats is not unexpected error */

          err != NdbIndexStat::NoIndexStats &&

          /* warning was printed at first error */

          err != Ndb_index_stat_error_HAS_ERROR)

      {

        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                            ER_CANT_GET_STAT, /* pun? */

                            "index stats (RIR) for key %s:"

                            " unexpected error %d",

                            key_info->name, err);

      }

      /*fall through*/

    }


    if (g_ndb_records_in_range_tree_dive)

    {

      NDB_INDEX_DATA& d=m_index[inx];

      const NDBINDEX* index= d.index;

      Ndb *ndb= get_ndb(thd);

      NdbTransaction* active_trans= m_thd_ndb ? m_thd_ndb->trans : 0;

      NdbTransaction* trans=NULL;

      int res=0;

      Uint64 rows;


      do

      {

        if ((trans=active_trans) == NULL ||

            trans->commitStatus() != NdbTransaction::Started)

        {

          DBUG_PRINT("info", ("no active trans"));

          if (! (trans=ndb->startTransaction()))

            ERR_BREAK(ndb->getNdbError(), res);

        }


        /* Create an IndexBound struct for the keys */

        NdbIndexScanOperation::IndexBound ib;

        compute_index_bounds(ib,

                             key_info,

                             min_key,

                             max_key,

                             0);


        ib.range_no= 0;


        NdbIndexStat is;

        if (is.records_in_range(index,

                                trans,

                                d.ndb_record_key,

                                m_ndb_record,

                                &ib,

                                0,

                                &rows,

                                0) == -1)

          ERR_BREAK(is.getNdbError(), res);

      } while (0);


      if (trans != active_trans && rows == 0)

        rows = 1;

      if (trans != active_trans && trans != NULL)

        ndb->closeTransaction(trans);

      if (res == 0)

        DBUG_RETURN(rows);

      /*fall through*/

    }

  }


  /* Use simple heuristics to estimate fraction

     of 'stats.record' returned from range.

  */

  do

  {

    if (stats.records == ~(ha_rows)0 || stats.records == 0)

    {

      /* Refresh statistics, only read from datanodes if 'use_exact_count' */

      THD *thd= current_thd;

      if (update_stats(thd, THDVAR(thd, use_exact_count)))

        break;

    }


    Uint64 rows;

    Uint64 table_rows= stats.records;

    size_t eq_bound_len= 0;

    size_t min_key_length= (min_key) ? min_key->length : 0;

    size_t max_key_length= (max_key) ? max_key->length : 0;


    // Might have an closed/open range bound:

    // Low range open

    if (!min_key_length)

    {

      rows= (!max_key_length)

           ? table_rows             // No range was specified

           : table_rows/10;         // -oo .. <high range> -> 10% selectivity

    }

    // High range open

    else if (!max_key_length)

    {

      rows= table_rows/10;          // <low range>..oo -> 10% selectivity

    }

    else

    {

      size_t bounds_len= MIN(min_key_length,max_key_length);

      uint eq_bound_len= 0;

      uint eq_bound_offs= 0;


      KEY_PART_INFO* key_part= key_info->key_part;

      KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;

      for (; key_part != end; key_part++)

      {

        uint part_length= key_part->store_length;

        if (eq_bound_offs+part_length > bounds_len ||

            memcmp(&min_key->key[eq_bound_offs],

                   &max_key->key[eq_bound_offs],

                   part_length))

        {

          break;

        }

        eq_bound_len+= key_part->length;

        eq_bound_offs+= part_length;

      }


      if (!eq_bound_len)

      {

        rows= table_rows/20;        // <low range>..<high range> -> 5%

      }

      else

      {

        // Has an equality range on a leading part of 'key_length':

        // - Assume reduced selectivity for non-unique indexes

        //   by decreasing 'eq_fraction' by 20%

        // - Assume equal selectivity for all eq_parts in key.


        double eq_fraction = (double)(eq_bound_len) / key_length;

        if (idx_type == ORDERED_INDEX) // Non-unique index -> less selectivity

          eq_fraction/= 1.20;

        if (eq_fraction >= 1.0)        // Exact match -> 1 row

          DBUG_RETURN(1);


        rows = (Uint64)((double)table_rows / pow((double)table_rows, eq_fraction));

        if (rows > (table_rows/50))    // EQ-range: Max 2% of rows

          rows= (table_rows/50);


        if (min_key_length > eq_bound_offs)

          rows/= 2;

        if (max_key_length > eq_bound_offs)

          rows/= 2;

      }

    }


    // Make sure that EQ is preferred even if row-count is low

    if (eq_bound_len && rows < 2)      // At least 2 rows as not exact

      rows= 2;

    else if (rows < 3)

      rows= 3;

    DBUG_RETURN(MIN(rows,table_rows));

  } while (0);


  DBUG_RETURN(10); /* Poor guess when you don't know anything */

}


ulonglong ha_ndbcluster::table_flags(void) const

{

  THD *thd= current_thd;

  ulonglong f=

    HA_REC_NOT_IN_SEQ |

    HA_NULL_IN_KEY |

    HA_AUTO_PART_KEY |

    HA_NO_PREFIX_CHAR_KEYS |

#ifndef NDB_WITH_NEW_MRR_INTERFACE

    HA_NEED_READ_RANGE_BUFFER |

#endif

    HA_CAN_GEOMETRY |

    HA_CAN_BIT_FIELD |

    HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |

    HA_PRIMARY_KEY_REQUIRED_FOR_DELETE |

    HA_PARTIAL_COLUMN_READ |

    HA_HAS_OWN_BINLOGGING |

    HA_BINLOG_ROW_CAPABLE |

    HA_HAS_RECORDS |

#ifndef NDB_WITHOUT_ONLINE_ALTER

    HA_ONLINE_ALTER |

#endif

    0;


  /*

    To allow for logging of ndb tables during stmt based logging;

    flag cabablity, but also turn off flag for OWN_BINLOGGING

  */

  if (thd->variables.binlog_format == BINLOG_FORMAT_STMT)

    f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING;


  if (THDVAR(thd, join_pushdown))

    f= f | HA_BLOCK_CONST_TABLE;


  return f;

}


const char * ha_ndbcluster::table_type() const

{

  return("NDBCLUSTER");

}

uint ha_ndbcluster::max_supported_record_length() const

{

  return NDB_MAX_TUPLE_SIZE;

}

uint ha_ndbcluster::max_supported_keys() const

{

  return MAX_KEY;

}

uint ha_ndbcluster::max_supported_key_parts() const

{

  return NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY;

}

uint ha_ndbcluster::max_supported_key_length() const

{

  return NDB_MAX_KEY_SIZE;

}

uint ha_ndbcluster::max_supported_key_part_length() const

{

  return NDB_MAX_KEY_SIZE;

}

bool ha_ndbcluster::low_byte_first() const

{

#ifdef WORDS_BIGENDIAN

  return FALSE;

#else

  return TRUE;

#endif

}

const char* ha_ndbcluster::index_type(uint key_number)

{

  switch (get_index_type(key_number)) {

  case ORDERED_INDEX:

  case UNIQUE_ORDERED_INDEX:

  case PRIMARY_KEY_ORDERED_INDEX:

    return "BTREE";

  case UNIQUE_INDEX:

  case PRIMARY_KEY_INDEX:

  default:

    return "HASH";

  }

}


uint8 ha_ndbcluster::table_cache_type()

{

  DBUG_ENTER("ha_ndbcluster::table_cache_type=HA_CACHE_TBL_ASKTRANSACT");

  DBUG_RETURN(HA_CACHE_TBL_ASKTRANSACT);

}


uint ndb_get_commitcount(THD *thd, char *norm_name,

                         Uint64 *commit_count)

{

  char dbname[NAME_LEN + 1];

  NDB_SHARE *share;

  DBUG_ENTER("ndb_get_commitcount");


  DBUG_PRINT("enter", ("name: %s", norm_name));

  pthread_mutex_lock(&ndbcluster_mutex);

  if (!(share=(NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,

                                          (const uchar*) norm_name,

                                          strlen(norm_name))))

  {

    pthread_mutex_unlock(&ndbcluster_mutex);

    DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables",

                         norm_name));

    DBUG_RETURN(1);

  }

  /* ndb_share reference temporary, free below */

  share->use_count++;

  DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",

                           share->key, share->use_count));

  pthread_mutex_unlock(&ndbcluster_mutex);


  pthread_mutex_lock(&share->mutex);

  if (opt_ndb_cache_check_time > 0)

  {

    if (share->commit_count != 0)

    {

      *commit_count= share->commit_count;

#ifndef DBUG_OFF

      char buff[22];

#endif

      DBUG_PRINT("info", ("Getting commit_count: %s from share",

                          llstr(share->commit_count, buff)));

      pthread_mutex_unlock(&share->mutex);

      /* ndb_share reference temporary free */

      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                               share->key, share->use_count));

      free_share(&share);

      DBUG_RETURN(0);

    }

  }

  DBUG_PRINT("info", ("Get commit_count from NDB"));

  Ndb *ndb;

  if (!(ndb= check_ndb_in_thd(thd)))

    DBUG_RETURN(1);


  ha_ndbcluster::set_dbname(norm_name, dbname);

  if (ndb->setDatabaseName(dbname))

  {

    ERR_RETURN(ndb->getNdbError());

  }

  uint lock= share->commit_count_lock;

  pthread_mutex_unlock(&share->mutex);


  struct Ndb_statistics stat;

  {

    char tblname[NAME_LEN + 1];

    ha_ndbcluster::set_tabname(norm_name, tblname);

    Ndb_table_guard ndbtab_g(ndb->getDictionary(), tblname);

    if (ndbtab_g.get_table() == 0

        || ndb_get_table_statistics(thd, NULL,

                                    FALSE,

                                    ndb,

                                    ndbtab_g.get_table()->getDefaultRecord(),

                                    &stat))

    {

      /* ndb_share reference temporary free */

      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                               share->key, share->use_count));

      free_share(&share);

      DBUG_RETURN(1);

    }

  }


  pthread_mutex_lock(&share->mutex);

  if (share->commit_count_lock == lock)

  {

#ifndef DBUG_OFF

    char buff[22];

#endif

    DBUG_PRINT("info", ("Setting commit_count to %s",

                        llstr(stat.commit_count, buff)));

    share->commit_count= stat.commit_count;

    *commit_count= stat.commit_count;

  }

  else

  {

    DBUG_PRINT("info", ("Discarding commit_count, comit_count_lock changed"));

    *commit_count= 0;

  }

  pthread_mutex_unlock(&share->mutex);

  /* ndb_share reference temporary free */

  DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                           share->key, share->use_count));

  free_share(&share);

  DBUG_RETURN(0);

}


static my_bool

ndbcluster_cache_retrieval_allowed(THD *thd,

                                   char *full_name, uint full_name_len,

                                   ulonglong *engine_data)

{

  Uint64 commit_count;

  char dbname[NAME_LEN + 1];

  char tabname[NAME_LEN + 1];

#ifndef DBUG_OFF

  char buff[22], buff2[22];

#endif


  ha_ndbcluster::set_dbname(full_name, dbname);

  ha_ndbcluster::set_tabname(full_name, tabname);


  DBUG_ENTER("ndbcluster_cache_retrieval_allowed");

  DBUG_PRINT("enter", ("dbname: %s, tabname: %s",

                       dbname, tabname));


  if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))

  {

    /* Don't allow qc to be used if table has been previously

       modified in transaction */

    if (!check_ndb_in_thd(thd))

      DBUG_RETURN(FALSE);

   Thd_ndb *thd_ndb= get_thd_ndb(thd);

    if (!thd_ndb->changed_tables.is_empty())

    {

      NDB_SHARE* share;

      List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);

      while ((share= it++))

      {

        if (strcmp(share->table_name, tabname) == 0 &&

            strcmp(share->db, dbname) == 0)

        {

          DBUG_PRINT("exit", ("No, transaction has changed table"));

          DBUG_RETURN(FALSE);

        }

      }

    }

  }


  if (ndb_get_commitcount(thd, full_name, &commit_count))

  {

    *engine_data= 0; /* invalidate */

    DBUG_PRINT("exit", ("No, could not retrieve commit_count"));

    DBUG_RETURN(FALSE);

  }

  DBUG_PRINT("info", ("*engine_data: %s, commit_count: %s",

                      llstr(*engine_data, buff), llstr(commit_count, buff2)));

  if (commit_count == 0)

  {

    *engine_data= 0; /* invalidate */

    DBUG_PRINT("exit", ("No, local commit has been performed"));

    DBUG_RETURN(FALSE);

  }

  else if (*engine_data != commit_count)

  {

    *engine_data= commit_count; /* invalidate */

     DBUG_PRINT("exit", ("No, commit_count has changed"));

     DBUG_RETURN(FALSE);

   }


  DBUG_PRINT("exit", ("OK to use cache, engine_data: %s",

                      llstr(*engine_data, buff)));

  DBUG_RETURN(TRUE);

}


my_bool

ha_ndbcluster::register_query_cache_table(THD *thd,

                                          char *full_name, uint full_name_len,

                                          qc_engine_callback *engine_callback,

                                          ulonglong *engine_data)

{

  Uint64 commit_count;

#ifndef DBUG_OFF

  char buff[22];

#endif

  DBUG_ENTER("ha_ndbcluster::register_query_cache_table");

  DBUG_PRINT("enter",("dbname: %s, tabname: %s",

                      m_dbname, m_tabname));


  if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))

  {

    /* Don't allow qc to be used if table has been previously

       modified in transaction */

    Thd_ndb *thd_ndb= get_thd_ndb(thd);

    if (!thd_ndb->changed_tables.is_empty())

    {

      DBUG_ASSERT(m_share);

      NDB_SHARE* share;

      List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);

      while ((share= it++))

      {

        if (m_share == share)

        {

          DBUG_PRINT("exit", ("No, transaction has changed table"));

          DBUG_RETURN(FALSE);

        }

      }

    }

  }


  if (ndb_get_commitcount(thd, full_name, &commit_count))

  {

    *engine_data= 0;

    DBUG_PRINT("exit", ("Error, could not get commitcount"));

    DBUG_RETURN(FALSE);

  }

  *engine_data= commit_count;

  *engine_callback= ndbcluster_cache_retrieval_allowed;

  DBUG_PRINT("exit", ("commit_count: %s", llstr(commit_count, buff)));

  DBUG_RETURN(commit_count > 0);

}


static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,

                                my_bool not_used __attribute__((unused)))

{

  *length= share->key_length;

  return (uchar*) share->key;

}


#ifndef DBUG_OFF


static void print_share(const char* where, NDB_SHARE* share)

{

  fprintf(DBUG_FILE,

          "%s %s.%s: use_count: %u, commit_count: %lu\n",

          where, share->db, share->table_name, share->use_count,

          (ulong) share->commit_count);

  fprintf(DBUG_FILE,

          "  - key: %s, key_length: %d\n",

          share->key, share->key_length);


  Ndb_event_data *event_data= 0;

  if (share->event_data)

    event_data= share->event_data;

  else if (share->op)

    event_data= (Ndb_event_data *) share->op->getCustomData();

  if (event_data)

  {

    fprintf(DBUG_FILE,

            "  - event_data->shadow_table: %p %s.%s\n",

            event_data->shadow_table, event_data->shadow_table->s->db.str,

            event_data->shadow_table->s->table_name.str);

  }

}


static void print_ndbcluster_open_tables()

{

  DBUG_LOCK_FILE;

  fprintf(DBUG_FILE, ">ndbcluster_open_tables\n");

  for (uint i= 0; i < ndbcluster_open_tables.records; i++)

    print_share("",

                (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i));

  fprintf(DBUG_FILE, "<ndbcluster_open_tables\n");

  DBUG_UNLOCK_FILE;

}


#endif


#define dbug_print_open_tables()                \

  DBUG_EXECUTE("info",                          \

               print_ndbcluster_open_tables(););


#define dbug_print_share(t, s)                  \

  DBUG_LOCK_FILE;                               \

  DBUG_EXECUTE("info",                          \

               print_share((t), (s)););         \

  DBUG_UNLOCK_FILE;


/*

  For some reason a share is still around, try to salvage the situation

  by closing all cached tables. If the share still exists, there is an

  error somewhere but only report this to the error log.  Keep this

  "trailing share" but rename it since there are still references to it

  to avoid segmentation faults.  There is a risk that the memory for

  this trailing share leaks.


  Must be called with previous pthread_mutex_lock(&ndbcluster_mutex)

*/

int handle_trailing_share(THD *thd, NDB_SHARE *share)

{

  static ulong trailing_share_id= 0;

  DBUG_ENTER("handle_trailing_share");


  /* ndb_share reference temporary, free below */

  ++share->use_count;

  if (opt_ndb_extra_logging > 9)

    sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);

  DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",

                           share->key, share->use_count));

  pthread_mutex_unlock(&ndbcluster_mutex);


  TABLE_LIST table_list;

  memset(&table_list, 0, sizeof(table_list));

  table_list.db= share->db;

  table_list.alias= table_list.table_name= share->table_name;

  close_cached_tables(thd, &table_list, TRUE, FALSE, FALSE);


  pthread_mutex_lock(&ndbcluster_mutex);

  /* ndb_share reference temporary free */

  DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                           share->key, share->use_count));

  if (!--share->use_count)

  {

    if (opt_ndb_extra_logging > 9)

      sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);

    if (opt_ndb_extra_logging)

      sql_print_information("NDB_SHARE: trailing share "

                            "%s(connect_count: %u) "

                            "released by close_cached_tables at "

                            "connect_count: %u",

                            share->key,

                            share->connect_count,

                            g_ndb_cluster_connection->get_connect_count());

    ndbcluster_real_free_share(&share);

    DBUG_RETURN(0);

  }

  if (opt_ndb_extra_logging > 9)

    sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);


  /*

    share still exists, if share has not been dropped by server

    release that share

  */

  if (share->state != NSS_DROPPED)

  {

    share->state= NSS_DROPPED;

    /* ndb_share reference create free */

    DBUG_PRINT("NDB_SHARE", ("%s create free  use_count: %u",

                             share->key, share->use_count));

    --share->use_count;

    if (opt_ndb_extra_logging > 9)

      sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);


    if (share->use_count == 0)

    {

      if (opt_ndb_extra_logging)

        sql_print_information("NDB_SHARE: trailing share "

                              "%s(connect_count: %u) "

                              "released after NSS_DROPPED check "

                              "at connect_count: %u",

                              share->key,

                              share->connect_count,

                              g_ndb_cluster_connection->get_connect_count());

      ndbcluster_real_free_share(&share);

      DBUG_RETURN(0);

    }

  }


  DBUG_PRINT("info", ("NDB_SHARE: %s already exists use_count=%d, op=0x%lx.",

                      share->key, share->use_count, (long) share->op));

  /*

     Ignore table shares only opened by util thread

   */

  if (!((share->use_count == 1) && share->util_thread))

  {

#ifdef NDB_LOG_TRAILING_SHARE_ERRORS

    sql_print_warning("NDB_SHARE: %s already exists use_count=%d."

                      " Moving away for safety, but possible memleak.",

                      share->key, share->use_count);

#endif

  }

  dbug_print_open_tables();


  /*

    Ndb share has not been released as it should

  */

#ifdef NOT_YET

  DBUG_ASSERT(FALSE);

#endif


  /*

    This is probably an error.  We can however save the situation

    at the cost of a possible mem leak, by "renaming" the share

    - First remove from hash

  */

  my_hash_delete(&ndbcluster_open_tables, (uchar*) share);


  /*

    now give it a new name, just a running number

    if space is not enough allocate some more

  */

  {

    const uint min_key_length= 10;

    if (share->key_length < min_key_length)

    {

      share->key= (char*) alloc_root(&share->mem_root, min_key_length + 1);

      share->key_length= min_key_length;

    }

    share->key_length=

      my_snprintf(share->key, min_key_length + 1, "#leak%lu",

                  trailing_share_id++);

  }

  /* Keep it for possible the future trailing free */

  my_hash_insert(&ndbcluster_open_tables, (uchar*) share);


  DBUG_RETURN(0);

}


/*

  Rename share is used during rename table.

*/

int ndbcluster_prepare_rename_share(NDB_SHARE *share, const char *new_key)

{

  /*

    allocate and set the new key, db etc

    enough space for key, db, and table_name

  */

  uint new_length= (uint) strlen(new_key);

  share->new_key= (char*) alloc_root(&share->mem_root, 2 * (new_length + 1));

  strmov(share->new_key, new_key);

  return 0;

}


int ndbcluster_undo_rename_share(THD *thd, NDB_SHARE *share)

{

  share->new_key= share->old_names;

  ndbcluster_rename_share(thd, share);

  return 0;

}


int ndbcluster_rename_share(THD *thd, NDB_SHARE *share)

{

  NDB_SHARE *tmp;

  pthread_mutex_lock(&ndbcluster_mutex);

  uint new_length= (uint) strlen(share->new_key);

  DBUG_PRINT("ndbcluster_rename_share", ("old_key: %s  old__length: %d",

                              share->key, share->key_length));

  if ((tmp= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,

                                        (const uchar*) share->new_key,

                                        new_length)))

    handle_trailing_share(thd, tmp);


  /* remove the share from hash */

  my_hash_delete(&ndbcluster_open_tables, (uchar*) share);

  dbug_print_open_tables();


  /* save old stuff if insert should fail */

  uint old_length= share->key_length;

  char *old_key= share->key;


  share->key= share->new_key;

  share->key_length= new_length;


  if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))

  {

    // ToDo free the allocated stuff above?

    DBUG_PRINT("error", ("ndbcluster_rename_share: my_hash_insert %s failed",

                         share->key));

    share->key= old_key;

    share->key_length= old_length;

    if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))

    {

      sql_print_error("ndbcluster_rename_share: failed to recover %s", share->key);

      DBUG_PRINT("error", ("ndbcluster_rename_share: my_hash_insert %s failed",

                           share->key));

    }

    dbug_print_open_tables();

    pthread_mutex_unlock(&ndbcluster_mutex);

    return -1;

  }

  dbug_print_open_tables();


  share->db= share->key + new_length + 1;

  ha_ndbcluster::set_dbname(share->new_key, share->db);

  share->table_name= share->db + strlen(share->db) + 1;

  ha_ndbcluster::set_tabname(share->new_key, share->table_name);


  dbug_print_share("ndbcluster_rename_share:", share);

  Ndb_event_data *event_data= 0;

  if (share->event_data)

    event_data= share->event_data;

  else if (share->op)

    event_data= (Ndb_event_data *) share->op->getCustomData();

  if (event_data && event_data->shadow_table)

  {

    if (!IS_TMP_PREFIX(share->table_name))

    {

      event_data->shadow_table->s->db.str= share->db;

      event_data->shadow_table->s->db.length= strlen(share->db);

      event_data->shadow_table->s->table_name.str= share->table_name;

      event_data->shadow_table->s->table_name.length= strlen(share->table_name);

    }

    else

    {

    }

  }

  /* else rename will be handled when the ALTER event comes */

  share->old_names= old_key;

  // ToDo free old_names after ALTER EVENT


  if (opt_ndb_extra_logging > 9)

    sql_print_information ("ndbcluster_rename_share: %s-%s use_count: %u", old_key, share->key, share->use_count);


  pthread_mutex_unlock(&ndbcluster_mutex);

  return 0;

}


/*

  Increase refcount on existing share.

  Always returns share and cannot fail.

*/

NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share)

{

  pthread_mutex_lock(&ndbcluster_mutex);

  share->use_count++;


  dbug_print_open_tables();

  dbug_print_share("ndbcluster_get_share:", share);

  if (opt_ndb_extra_logging > 9)

    sql_print_information ("ndbcluster_get_share: %s use_count: %u", share->key, share->use_count);

  pthread_mutex_unlock(&ndbcluster_mutex);

  return share;

}


/*

  Get a share object for key


  Returns share for key, and increases the refcount on the share.


  create_if_not_exists == TRUE:

    creates share if it does not alreade exist

    returns 0 only due to out of memory, and then sets my_error


  create_if_not_exists == FALSE:

    returns 0 if share does not exist


  have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken

*/


NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table,

                                bool create_if_not_exists,

                                bool have_lock)

{

  NDB_SHARE *share;

  uint length= (uint) strlen(key);

  DBUG_ENTER("ndbcluster_get_share");

  DBUG_PRINT("enter", ("key: '%s'", key));


  if (!have_lock)

    pthread_mutex_lock(&ndbcluster_mutex);

  if (!(share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,

                                           (const uchar*) key,

                                           length)))

  {

    if (!create_if_not_exists)

    {

      DBUG_PRINT("error", ("get_share: %s does not exist", key));

      if (!have_lock)

        pthread_mutex_unlock(&ndbcluster_mutex);

      DBUG_RETURN(0);

    }

    if ((share= (NDB_SHARE*) my_malloc(sizeof(*share),

                                       MYF(MY_WME | MY_ZEROFILL))))

    {

      MEM_ROOT **root_ptr=

        my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC);

      MEM_ROOT *old_root= *root_ptr;

      init_sql_alloc(&share->mem_root, 1024, 0);

      *root_ptr= &share->mem_root; // remember to reset before return

      share->flags= 0;

      share->state= NSS_INITIAL;

      /* enough space for key, db, and table_name */

      share->key= (char*) alloc_root(*root_ptr, 2 * (length + 1));

      share->key_length= length;

      strmov(share->key, key);

      if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))

      {

        free_root(&share->mem_root, MYF(0));

        my_free((uchar*) share, 0);

        *root_ptr= old_root;

        if (!have_lock)

          pthread_mutex_unlock(&ndbcluster_mutex);

        DBUG_RETURN(0);

      }

      thr_lock_init(&share->lock);

      pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST);

      share->commit_count= 0;

      share->commit_count_lock= 0;

      share->db= share->key + length + 1;

      ha_ndbcluster::set_dbname(key, share->db);

      share->table_name= share->db + strlen(share->db) + 1;

      ha_ndbcluster::set_tabname(key, share->table_name);

      if (ndbcluster_binlog_init_share(current_thd, share, table))

      {

        DBUG_PRINT("error", ("get_share: %s could not init share", key));

        ndbcluster_real_free_share(&share);

        *root_ptr= old_root;

        if (!have_lock)

          pthread_mutex_unlock(&ndbcluster_mutex);

        DBUG_RETURN(0);

      }

      *root_ptr= old_root;

    }

    else

    {

      DBUG_PRINT("error", ("get_share: failed to alloc share"));

      if (!have_lock)

        pthread_mutex_unlock(&ndbcluster_mutex);

      my_error(ER_OUTOFMEMORY, MYF(0), static_cast<int>(sizeof(*share)));

      DBUG_RETURN(0);

    }

  }

  share->use_count++;

  if (opt_ndb_extra_logging > 9)

    sql_print_information ("ndbcluster_get_share: %s use_count: %u", share->key, share->use_count);


  dbug_print_open_tables();

  dbug_print_share("ndbcluster_get_share:", share);

  if (!have_lock)

    pthread_mutex_unlock(&ndbcluster_mutex);

  DBUG_RETURN(share);

}


void ndbcluster_real_free_share(NDB_SHARE **share)

{

  DBUG_ENTER("ndbcluster_real_free_share");

  dbug_print_share("ndbcluster_real_free_share:", *share);


  if (opt_ndb_extra_logging > 9)

    sql_print_information ("ndbcluster_real_free_share: %s use_count: %u", (*share)->key, (*share)->use_count);


  ndb_index_stat_free(*share);


  my_hash_delete(&ndbcluster_open_tables, (uchar*) *share);

  thr_lock_delete(&(*share)->lock);

  pthread_mutex_destroy(&(*share)->mutex);


#ifdef HAVE_NDB_BINLOG

  if ((*share)->m_cfn_share && (*share)->m_cfn_share->m_ex_tab && g_ndb)

  {

    NDBDICT *dict= g_ndb->getDictionary();

    dict->removeTableGlobal(*(*share)->m_cfn_share->m_ex_tab, 0);

    (*share)->m_cfn_share->m_ex_tab= 0;

  }

#endif

  (*share)->new_op= 0;

  if ((*share)->event_data)

  {

    delete (*share)->event_data;

    (*share)->event_data= 0;

  }

  free_root(&(*share)->mem_root, MYF(0));

  my_free((uchar*) *share, MYF(0));

  *share= 0;


  dbug_print_open_tables();

  DBUG_VOID_RETURN;

}


void ndbcluster_free_share(NDB_SHARE **share, bool have_lock)

{

  if (!have_lock)

    pthread_mutex_lock(&ndbcluster_mutex);

  if (!--(*share)->use_count)

  {

    if (opt_ndb_extra_logging > 9)

      sql_print_information ("ndbcluster_free_share: %s use_count: %u", (*share)->key, (*share)->use_count);

    ndbcluster_real_free_share(share);

  }

  else

  {

    if (opt_ndb_extra_logging > 9)

      sql_print_information ("ndbcluster_free_share: %s use_count: %u", (*share)->key, (*share)->use_count);

    dbug_print_open_tables();

    dbug_print_share("ndbcluster_free_share:", *share);

  }

  if (!have_lock)

    pthread_mutex_unlock(&ndbcluster_mutex);

}


struct ndb_table_statistics_row {

  Uint64 rows;

  Uint64 commits;

  Uint32 size;

  Uint64 fixed_mem;

  Uint64 var_mem;

};


int ha_ndbcluster::update_stats(THD *thd,

                                bool do_read_stat,

                                bool have_lock,

                                uint part_id)

{

  struct Ndb_statistics stat;

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  DBUG_ENTER("ha_ndbcluster::update_stats");

  do

  {

    if (m_share && !do_read_stat)

    {

      pthread_mutex_lock(&m_share->mutex);

      stat= m_share->stat;

      pthread_mutex_unlock(&m_share->mutex);


      DBUG_ASSERT(stat.row_count != ~(ha_rows)0); // should never be invalid


      /* Accept shared cached statistics if row_count is valid. */

      if (stat.row_count != ~(ha_rows)0)

        break;

    }


    /* Request statistics from datanodes */

    Ndb *ndb= thd_ndb->ndb;

    if (ndb->setDatabaseName(m_dbname))

    {

      DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM);

    }

    if (int err= ndb_get_table_statistics(thd, this, TRUE, ndb,

                                          m_ndb_record, &stat,

                                          have_lock, part_id))

    {

      DBUG_RETURN(err);

    }


    /* Update shared statistics with fresh data */

    if (m_share)

    {

      pthread_mutex_lock(&m_share->mutex);

      m_share->stat= stat;

      pthread_mutex_unlock(&m_share->mutex);

    }

    break;

  }

  while(0);


  int no_uncommitted_rows_count= 0;

  if (m_table_info && !thd_ndb->m_error)

  {

    m_table_info->records= stat.row_count;

    m_table_info->last_count= thd_ndb->count;

    no_uncommitted_rows_count= m_table_info->no_uncommitted_rows_count;

  }

  stats.mean_rec_length= stat.row_size;

  stats.data_file_length= stat.fragment_memory;

  stats.records= stat.row_count + no_uncommitted_rows_count;

  stats.max_data_file_length= stat.fragment_extent_space;

  stats.delete_length= stat.fragment_extent_free_space;


  DBUG_PRINT("exit", ("stats.records: %d  "

                      "stat->row_count: %d  "

                      "no_uncommitted_rows_count: %d"

                      "stat->fragment_extent_space: %u  "

                      "stat->fragment_extent_free_space: %u",

                      (int)stats.records,

                      (int)stat.row_count,

                      (int)no_uncommitted_rows_count,

                      (uint)stat.fragment_extent_space,

                      (uint)stat.fragment_extent_free_space));

  DBUG_RETURN(0);

}


static

void modify_shared_stats(NDB_SHARE *share,

                         Ndb_local_table_statistics *local_stat)

{

  if (local_stat->no_uncommitted_rows_count)

  {

    pthread_mutex_lock(&share->mutex);

    DBUG_ASSERT(share->stat.row_count != ~(ha_rows)0);// should never be invalid

    if (share->stat.row_count != ~(ha_rows)0)

    {

      DBUG_PRINT("info", ("Update row_count for %s, row_count: %lu, with:%d",

                          share->table_name, (ulong) share->stat.row_count,

                          local_stat->no_uncommitted_rows_count));

      share->stat.row_count=

        ((Int64)share->stat.row_count+local_stat->no_uncommitted_rows_count > 0)

         ? share->stat.row_count+local_stat->no_uncommitted_rows_count

         : 0;

    }

    pthread_mutex_unlock(&share->mutex);

    local_stat->no_uncommitted_rows_count= 0;

  }

}


/* If part_id contains a legal partition id, ndbstat returns the

   partition-statistics pertaining to that partition only.

   Otherwise, it returns the table-statistics,

   which is an aggregate over all partitions of that table.

 */

static

int

ndb_get_table_statistics(THD *thd, ha_ndbcluster* file, bool report_error, Ndb* ndb,

                         const NdbRecord *record,

                         struct Ndb_statistics * ndbstat,

                         bool have_lock,

                         uint part_id)

{

  Thd_ndb *thd_ndb= get_thd_ndb(current_thd);

  NdbTransaction* pTrans;

  NdbError error;

  int retries= 100;

  int reterr= 0;

  int retry_sleep= 30; /* 30 milliseconds */

  const char *dummyRowPtr;

  NdbOperation::GetValueSpec extraGets[8];

  Uint64 rows, commits, fixed_mem, var_mem, ext_space, free_ext_space;

  Uint32 size, fragid;

#ifndef DBUG_OFF

  char buff[22], buff2[22], buff3[22], buff4[22], buff5[22], buff6[22];

#endif

  DBUG_ENTER("ndb_get_table_statistics");


  DBUG_ASSERT(record != 0);


  /* We use the passed in NdbRecord just to get access to the

     table, we mask out any/all columns it may have and add

     our reads as extraGets.  This is necessary as they are

     all pseudo-columns

  */

  extraGets[0].column= NdbDictionary::Column::ROW_COUNT;

  extraGets[0].appStorage= &rows;

  extraGets[1].column= NdbDictionary::Column::COMMIT_COUNT;

  extraGets[1].appStorage= &commits;

  extraGets[2].column= NdbDictionary::Column::ROW_SIZE;

  extraGets[2].appStorage= &size;

  extraGets[3].column= NdbDictionary::Column::FRAGMENT_FIXED_MEMORY;

  extraGets[3].appStorage= &fixed_mem;

  extraGets[4].column= NdbDictionary::Column::FRAGMENT_VARSIZED_MEMORY;

  extraGets[4].appStorage= &var_mem;

  extraGets[5].column= NdbDictionary::Column::FRAGMENT_EXTENT_SPACE;

  extraGets[5].appStorage= &ext_space;

  extraGets[6].column= NdbDictionary::Column::FRAGMENT_FREE_EXTENT_SPACE;

  extraGets[6].appStorage= &free_ext_space;

  extraGets[7].column= NdbDictionary::Column::FRAGMENT;

  extraGets[7].appStorage= &fragid;


  const Uint32 codeWords= 1;

  Uint32 codeSpace[ codeWords ];

  NdbInterpretedCode code(NULL, // Table is irrelevant

                          &codeSpace[0],

                          codeWords);

  if ((code.interpret_exit_last_row() != 0) ||

      (code.finalise() != 0))

  {

    reterr= code.getNdbError().code;

    DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,

                        error.code, error.message));

    DBUG_RETURN(reterr);

  }


  do

  {

    Uint32 count= 0;

    Uint64 sum_rows= 0;

    Uint64 sum_commits= 0;

    Uint64 sum_row_size= 0;

    Uint64 sum_mem= 0;

    Uint64 sum_ext_space= 0;

    Uint64 sum_free_ext_space= 0;

    NdbScanOperation*pOp;

    int check;


    if ((pTrans= ndb->startTransaction()) == NULL)

    {

      error= ndb->getNdbError();

      goto retry;

    }


    NdbScanOperation::ScanOptions options;

    options.optionsPresent= NdbScanOperation::ScanOptions::SO_BATCH |

                            NdbScanOperation::ScanOptions::SO_GETVALUE |

                            NdbScanOperation::ScanOptions::SO_INTERPRETED;

    /* Set batch_size=1, as we need only one row per fragment. */

    options.batch= 1;

    options.extraGetValues= &extraGets[0];

    options.numExtraGetValues= sizeof(extraGets)/sizeof(extraGets[0]);

    options.interpretedCode= &code;


    if ((pOp= pTrans->scanTable(record, NdbOperation::LM_CommittedRead,

                                empty_mask,

                                &options,

                                sizeof(NdbScanOperation::ScanOptions))) == NULL)

    {

      error= pTrans->getNdbError();

      goto retry;

    }

    thd_ndb->m_scan_count++;

    thd_ndb->m_pruned_scan_count += (pOp->getPruned()? 1 : 0);


    thd_ndb->m_execute_count++;

    DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));

    if (pTrans->execute(NdbTransaction::NoCommit,

                        NdbOperation::AbortOnError,

                        TRUE) == -1)

    {

      error= pTrans->getNdbError();

      goto retry;

    }


    while ((check= pOp->nextResult(&dummyRowPtr, TRUE, TRUE)) == 0)

    {

      DBUG_PRINT("info", ("nextResult rows: %d  commits: %d"

                          "fixed_mem_size %d var_mem_size %d "

                          "fragmentid %d extent_space %d free_extent_space %d",

                          (int)rows, (int)commits, (int)fixed_mem,

                          (int)var_mem, (int)fragid, (int)ext_space,

                          (int)free_ext_space));


      if ((part_id != ~(uint)0) && fragid != part_id)

      {

        continue;

      }


      sum_rows+= rows;

      sum_commits+= commits;

      if (sum_row_size < size)

        sum_row_size= size;

      sum_mem+= fixed_mem + var_mem;

      count++;

      sum_ext_space += ext_space;

      sum_free_ext_space += free_ext_space;


      if ((part_id != ~(uint)0) && fragid == part_id)

      {

        break;

      }

    }


    if (check == -1)

    {

      error= pOp->getNdbError();

      goto retry;

    }


    pOp->close(TRUE);


    ndb->closeTransaction(pTrans);


    ndbstat->row_count= sum_rows;

    ndbstat->commit_count= sum_commits;

    ndbstat->row_size= (ulong)sum_row_size;

    ndbstat->fragment_memory= sum_mem;

    ndbstat->fragment_extent_space= sum_ext_space;

    ndbstat->fragment_extent_free_space= sum_free_ext_space;


    DBUG_PRINT("exit", ("records: %s  commits: %s "

                        "row_size: %s  mem: %s "

                        "allocated: %s  free: %s "

                        "count: %u",

                        llstr(sum_rows, buff),

                        llstr(sum_commits, buff2),

                        llstr(sum_row_size, buff3),

                        llstr(sum_mem, buff4),

                        llstr(sum_ext_space, buff5),

                        llstr(sum_free_ext_space, buff6),

                        count));


    DBUG_RETURN(0);

retry:

    if(report_error)

    {

      if (file && pTrans)

      {

        reterr= file->ndb_err(pTrans, have_lock);

      }

      else

      {

        const NdbError& tmp= error;

        ERR_PRINT(tmp);

        reterr= ndb_to_mysql_error(&tmp);

      }

    }

    else

      reterr= error.code;


    if (pTrans)

    {

      ndb->closeTransaction(pTrans);

      pTrans= NULL;

    }

    if (error.status == NdbError::TemporaryError &&

        retries-- && !thd->killed)

    {

      do_retry_sleep(retry_sleep);

      continue;

    }

    break;

  } while(1);

  DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,

                      error.code, error.message));

  DBUG_RETURN(reterr);

}


int ha_ndbcluster::write_ndb_file(const char *name)

{

  File file;

  bool error=1;

  char path[FN_REFLEN];


  DBUG_ENTER("write_ndb_file");

  DBUG_PRINT("enter", ("name: %s", name));


#ifndef EMBEDDED_LIBRARY

  (void)strxnmov(path, FN_REFLEN-1,

                 mysql_data_home,"/",name,ha_ndb_ext,NullS);

#else

  (void)strxnmov(path, FN_REFLEN-1, name,ha_ndb_ext, NullS);

#endif


  if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0)

  {

    // It's an empty file

    error=0;

    my_close(file,MYF(0));

  }

  DBUG_RETURN(error);

}


#ifndef NDB_WITH_NEW_MRR_INTERFACE

bool

ha_ndbcluster::null_value_index_search(KEY_MULTI_RANGE *ranges,

                                       KEY_MULTI_RANGE *end_range,

                                       HANDLER_BUFFER *buffer)

{

  DBUG_ENTER("null_value_index_search");

  KEY* key_info= table->key_info + active_index;

  KEY_MULTI_RANGE *range= ranges;

  ulong reclength= table->s->reclength;

  uchar *curr= (uchar*)buffer->buffer;

  uchar *end_of_buffer= (uchar*)buffer->buffer_end;


  /* All passed ranges whose results could fit into the

   * buffer are examined, although some may later be

   * marked for skipping, wasting buffer space.

   */

  assert(!(range->range_flag & SKIP_RANGE));


  for (; range<end_range && curr+reclength <= end_of_buffer;

       range++)

  {

    const uchar *key= range->start_key.key;

    uint key_len= range->start_key.length;

    if (check_null_in_key(key_info, key, key_len))

      DBUG_RETURN(TRUE);

    curr += reclength;

  }

  DBUG_RETURN(FALSE);

}

#endif


void ha_ndbcluster::check_read_before_write_removal()

{

  DBUG_ENTER("check_read_before_write_removal");


  /* Must have determined that rbwr is possible */

  assert(m_read_before_write_removal_possible);

  m_read_before_write_removal_used= true;


  /* Can't use on table with hidden primary key */

  assert(table_share->primary_key != MAX_KEY);


  /* Index must be unique */

  DBUG_PRINT("info", ("using index %d", active_index));

  const KEY *key= table->key_info + active_index;

  assert((key->flags & HA_NOSAME)); NDB_IGNORE_VALUE(key);


  DBUG_VOID_RETURN;

}


#ifndef NDB_WITH_NEW_MRR_INTERFACE

/*

  This is used to check if an ordered index scan is needed for a range in

  a multi range read.

  If a scan is not needed, we use a faster primary/unique key operation

  instead.

*/

static my_bool

read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type, const KEY *key_info,

                      const KEY_MULTI_RANGE *r)

{

  if (cur_index_type == ORDERED_INDEX)

    return TRUE;

  if (cur_index_type == PRIMARY_KEY_INDEX ||

      cur_index_type == UNIQUE_INDEX)

    return FALSE;

  DBUG_ASSERT(cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||

              cur_index_type == UNIQUE_ORDERED_INDEX);

  if (r->start_key.length != key_info->key_length ||

      r->start_key.flag != HA_READ_KEY_EXACT)

    return TRUE;                                // Not exact match, need scan

  if (cur_index_type == UNIQUE_ORDERED_INDEX &&

      check_null_in_key(key_info, r->start_key.key,r->start_key.length))

    return TRUE;                                // Can't use for NULL values

  return FALSE;

}


int

ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,

                                      KEY_MULTI_RANGE *ranges,

                                      uint range_count,

                                      bool sorted,

                                      HANDLER_BUFFER *buffer)

{

  KEY* key_info= table->key_info + active_index;

  NDB_INDEX_TYPE cur_index_type= get_index_type(active_index);

  ulong reclength= table_share->reclength;

  NdbTransaction *trans= m_thd_ndb->trans;

  int error;


  DBUG_ENTER("ha_ndbcluster::read_multi_range_first");

  DBUG_PRINT("info", ("blob fields=%d read_set=0x%x", table_share->blob_fields, table->read_set->bitmap[0]));


  if (uses_blob_value(table->read_set) ||

      (cur_index_type ==  UNIQUE_INDEX &&

       has_null_in_unique_index(active_index) &&

       null_value_index_search(ranges, ranges+range_count, buffer))

      || (m_pushed_join_operation==PUSHED_ROOT &&

         !m_disable_pushed_join &&

         !m_pushed_join_member->get_query_def().isScanQuery())

      || m_delete_cannot_batch || m_update_cannot_batch)

  {

    DBUG_PRINT("info", ("read_multi_range not possible, falling back to default handler implementation"));

    m_disable_multi_read= TRUE;

    DBUG_RETURN(handler::read_multi_range_first(found_range_p,

                                                ranges,

                                                range_count,

                                                sorted,

                                                buffer));

  }


  if (unlikely((error= close_scan())))

    DBUG_RETURN(error);


  m_disable_multi_read= FALSE;


  /*

   * Copy arguments into member variables

   */

  m_multi_ranges= ranges;

  multi_range_curr= ranges;

  multi_range_end= ranges+range_count;

  multi_range_sorted= sorted;

  multi_range_buffer= buffer;


  /*

   * read multi range will read ranges as follows (if not ordered)

   *

   * input    read order

   * ======   ==========

   * pk-op 1  pk-op 1

   * pk-op 2  pk-op 2

   * range 3  range (3,5) NOTE result rows will be intermixed

   * pk-op 4  pk-op 4

   * range 5

   * pk-op 6  pk-op 6

   */


  /*

    We first loop over all ranges, converting into primary/unique key

    operations if possible, and counting ranges that require an

    ordered index scan. If the supplied HANDLER_BUFFER is too small, we

    may also need to do only part of the multi read at once.


    Afterwards, we create the ordered index scan cursor (if needed).

  */


  DBUG_ASSERT(cur_index_type != UNDEFINED_INDEX);

  DBUG_ASSERT(m_multi_cursor==NULL);

  DBUG_ASSERT(m_active_query==NULL);


  const NdbOperation* lastOp= trans ? trans->getLastDefinedOperation() : 0;

  const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);

  uchar *row_buf= (uchar *)buffer->buffer;

  const uchar *end_of_buffer= buffer->buffer_end;

  uint num_scan_ranges= 0;

  uint i;

  bool any_real_read= FALSE;


  if (m_read_before_write_removal_possible)

    check_read_before_write_removal();

  for (i= 0; i < range_count; i++)

  {

    KEY_MULTI_RANGE *r= &ranges[i];


    part_id_range part_spec;

    if (m_use_partition_pruning)

    {

      get_partition_set(table, table->record[0], active_index, &r->start_key,

                        &part_spec);

      DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",

                          part_spec.start_part, part_spec.end_part));

      /*

        If partition pruning has found no partition in set

        we can skip this scan

      */

      if (part_spec.start_part > part_spec.end_part)

      {

        /*

          We can skip this partition since the key won't fit into any

          partition

        */

        r->range_flag|= SKIP_RANGE;

        row_buf += reclength;

        continue;

      }

      if (!trans &&

          (part_spec.start_part == part_spec.end_part))

        if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,

                                                        error))))

          DBUG_RETURN(error);

    }

    r->range_flag&= ~(uint)SKIP_RANGE;


    if ((m_pushed_join_operation==PUSHED_ROOT &&

         m_pushed_join_member->get_query_def().isScanQuery()) || // Pushed joins restricted to ordered range scan in mrr

        read_multi_needs_scan(cur_index_type, key_info, r))

    {

      if (!trans)

      {

        // ToDo see if we can use start_transaction_key here instead

        if (!m_use_partition_pruning)

        {

          get_partition_set(table, table->record[0], active_index, &r->start_key,

                            &part_spec);

          if (part_spec.start_part == part_spec.end_part)

          {

            if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,

                                                            error))))

              DBUG_RETURN(error);

          }

          else if (unlikely(!(trans= start_transaction(error))))

            DBUG_RETURN(error);

        }

        else if (unlikely(!(trans= start_transaction(error))))

          DBUG_RETURN(error);

      }


      any_real_read= TRUE;

      DBUG_PRINT("info", ("any_real_read= TRUE"));


      /*

        If we reach the limit of ranges allowed in a single scan: stop

        here, send what we have so far, and continue when done with that.

      */

      if (i > NdbIndexScanOperation::MaxRangeNo)

      {

        DBUG_PRINT("info", ("Reached the limit of ranges allowed in a single"

                            "scan"));

        break;

      }


#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

      /* Create the scan operation for the first scan range. */

      if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan,

                            active_index,

                            !m_active_query && sorted))

      {

        if (!m_active_query)

        {

          const int error= create_pushed_join();

          if (unlikely(error))

            DBUG_RETURN(error);


          NdbQuery* const query= m_active_query;

          if (sorted &&

              query->getQueryOperation((uint)PUSHED_ROOT)->setOrdering(NdbQueryOptions::ScanOrdering_ascending))

            ERR_RETURN(query->getNdbError());

        }

      }

      else

#endif

      if (!m_multi_cursor)

      {

        if (m_pushed_join_operation == PUSHED_ROOT)

        {

          m_thd_ndb->m_pushed_queries_dropped++;

        }

        /* Do a multi-range index scan for ranges not done by primary/unique key. */

        NdbScanOperation::ScanOptions options;

        NdbInterpretedCode code(m_table);


        options.optionsPresent=

          NdbScanOperation::ScanOptions::SO_SCANFLAGS |

          NdbScanOperation::ScanOptions::SO_PARALLEL;


        options.scan_flags=

          NdbScanOperation::SF_ReadRangeNo |

          NdbScanOperation::SF_MultiRange;


        if (lm == NdbOperation::LM_Read)

          options.scan_flags|= NdbScanOperation::SF_KeyInfo;

        if (sorted)

          options.scan_flags|= NdbScanOperation::SF_OrderByFull;


        options.parallel= DEFAULT_PARALLELISM;


        NdbOperation::GetValueSpec gets[2];

        if (table_share->primary_key == MAX_KEY)

          get_hidden_fields_scan(&options, gets);


        if (m_cond && m_cond->generate_scan_filter(&code, &options))

          ERR_RETURN(code.getNdbError());


        /* Define scan */

        NdbIndexScanOperation *scanOp= trans->scanIndex

          (m_index[active_index].ndb_record_key,

           m_ndb_record,

           lm,

           (uchar *)(table->read_set->bitmap),

           NULL, /* All bounds specified below */

           &options,

           sizeof(NdbScanOperation::ScanOptions));


        if (!scanOp)

          ERR_RETURN(trans->getNdbError());


        m_multi_cursor= scanOp;


        /*

          We do not get_blob_values() here, as when using blobs we always

          fallback to non-batched multi range read (see if statement at

          top of this function).

        */


        /* We set m_next_row=0 to say that no row was fetched from the scan yet. */

        m_next_row= 0;

      }


      Ndb::PartitionSpec ndbPartitionSpec;

      const Ndb::PartitionSpec* ndbPartSpecPtr= NULL;


      /* If this table uses user-defined partitioning, use MySQLD provided

       * partition info as pruning info

       * Otherwise, scan range pruning is performed automatically by

       * NDBAPI based on distribution key values.

       */

      if (m_use_partition_pruning &&

          m_user_defined_partitioning &&

          (part_spec.start_part == part_spec.end_part))

      {

        DBUG_PRINT("info", ("Range on user-def-partitioned table can be pruned to part %u",

                            part_spec.start_part));

        ndbPartitionSpec.type= Ndb::PartitionSpec::PS_USER_DEFINED;

        ndbPartitionSpec.UserDefined.partitionId= part_spec.start_part;

        ndbPartSpecPtr= &ndbPartitionSpec;

      }


      /* Include this range in the ordered index scan. */

      NdbIndexScanOperation::IndexBound bound;

      compute_index_bounds(bound, key_info, &r->start_key, &r->end_key, 0);

      bound.range_no= i;


      const NdbRecord *key_rec= m_index[active_index].ndb_record_key;

      if (m_active_query)

      {

        DBUG_PRINT("info", ("setBound:%d, for pushed join", bound.range_no));

        if (m_active_query->setBound(key_rec, &bound))

        {

          ERR_RETURN(trans->getNdbError());

        }

      }

      else

      {

        if (m_multi_cursor->setBound(key_rec,

                                     bound,

                                     ndbPartSpecPtr, // Only for user-def tables

                                     sizeof(Ndb::PartitionSpec)))

        {

          ERR_RETURN(trans->getNdbError());

        }

      }


      r->range_flag&= ~(uint)UNIQUE_RANGE;

      num_scan_ranges++;

    }

    else // if ((...PUSHED_ROOT && m_pushed_join->get_query_def().isScanQuery()) ||...

    {

      if (m_pushed_join_operation == PUSHED_ROOT)

      {

        m_thd_ndb->m_pushed_queries_dropped++;

      }

      if (!trans)

      {

        DBUG_ASSERT(active_index != MAX_KEY);

        if (unlikely(!(trans= start_transaction_key(active_index,

                                                    r->start_key.key,

                                                    error))))

          DBUG_RETURN(error);

      }

      /*

        Convert to primary/unique key operation.


        If there is not enough buffer for reading the row: stop here, send

        what we have so far, and continue when done with that.

      */

      if (row_buf + reclength > end_of_buffer)

        break;


      if (m_read_before_write_removal_used)

      {

        r->range_flag|= READ_KEY_FROM_RANGE;

        continue;

      }

      else

      {

        any_real_read= TRUE;

        DBUG_PRINT("info", ("m_read_before_write_removal_used == FALSE, "

                            "any_real_read= TRUE"));

      }

      r->range_flag|= UNIQUE_RANGE;


      Uint32 partitionId;

      Uint32* ppartitionId = NULL;


      if (m_user_defined_partitioning &&

          (cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||

           cur_index_type == PRIMARY_KEY_INDEX))

      {

        partitionId=part_spec.start_part;

        ppartitionId=&partitionId;

      }


      DBUG_PRINT("info", ("Generating Pk/Unique key read for range %u", i));


      // 'Pushable codepath' is incomplete and expected not

      // to be produced as make_join_pushed() handle

      // AT_MULTI_UNIQUE_KEY as non-pushable

      if (m_pushed_join_operation==PUSHED_ROOT &&

          !m_disable_pushed_join &&

          !m_pushed_join_member->get_query_def().isScanQuery())

      {

        DBUG_ASSERT(false);  // Incomplete code, should not be executed

        DBUG_ASSERT(lm == NdbOperation::LM_CommittedRead);

        const int error= pk_unique_index_read_key_pushed(active_index,

                                                         r->start_key.key,

                                                         ppartitionId);

        if (unlikely(error))

          DBUG_RETURN(error);

      }

      else

      {

        if (m_pushed_join_operation == PUSHED_ROOT)

        {

          DBUG_PRINT("info", ("Cannot push join due to incomplete implementation."));

          m_thd_ndb->m_pushed_queries_dropped++;

        }

        const NdbOperation* op;

        if (!(op= pk_unique_index_read_key(active_index,

                                           r->start_key.key,

                                           row_buf, lm,

                                           ppartitionId)))

          ERR_RETURN(trans->getNdbError());

      }

      row_buf+= reclength;

    }

  }

  DBUG_ASSERT(i > 0 || i == range_count);       // Require progress

  m_multi_range_defined_end= ranges + i;


  buffer->end_of_used_area= row_buf;


  if (m_active_query != NULL &&

      m_pushed_join_member->get_query_def().isScanQuery())

  {

    m_thd_ndb->m_scan_count++;

    if (sorted)

    {

      m_thd_ndb->m_sorted_scan_count++;

    }


    bool prunable = false;

    if (unlikely(m_active_query->isPrunable(prunable) != 0))

      ERR_RETURN(m_active_query->getNdbError());

    if (prunable)

      m_thd_ndb->m_pruned_scan_count++;


    DBUG_PRINT("info", ("Is MRR scan-query pruned to 1 partition? :%u", prunable));

    DBUG_ASSERT(!m_multi_cursor);

  };

  if (m_multi_cursor)

  {

    DBUG_PRINT("info", ("Is MRR scan pruned to 1 partition? :%u",

                        m_multi_cursor->getPruned()));

    m_thd_ndb->m_scan_count++;

    m_thd_ndb->m_pruned_scan_count += (m_multi_cursor->getPruned()? 1 : 0);

    if (sorted)

    {

      m_thd_ndb->m_sorted_scan_count++;

    }

  };


  if (any_real_read)

  {

    /* Get pointer to first range key operation (not scans) */

    const NdbOperation* rangeOp= lastOp ? lastOp->next() :

      trans->getFirstDefinedOperation();


    DBUG_PRINT("info", ("Executing reads"));


    if (execute_no_commit_ie(m_thd_ndb, trans) == 0)

    {

      m_multi_range_result_ptr= buffer->buffer;


      /* We must check the result of any primary or unique key

       * ranges now, as these operations may be invalidated by

       * further execute+releaseOperations calls on this transaction by

       * different handler objects.

       */

      KEY_MULTI_RANGE* rangeInfo= multi_range_curr;


      for (;rangeInfo < m_multi_range_defined_end; rangeInfo++)

      {

        DBUG_PRINT("info", ("range flag is %u", rangeInfo->range_flag));

        if (rangeInfo->range_flag & SKIP_RANGE)

          continue;


        if ((rangeInfo->range_flag & UNIQUE_RANGE) &&

            (!(rangeInfo->range_flag & READ_KEY_FROM_RANGE)))

        {

          assert(rangeOp != NULL);

          if (rangeOp->getNdbError().code == 0)

          {

            /* Successful read, results are in buffer.

             */

            rangeInfo->range_flag &= ~(uint)EMPTY_RANGE;


            DBUG_PRINT("info", ("Unique range op has result"));

          }

          else

          {

            NdbError err= rangeOp->getNdbError();


            if (err.classification !=

                NdbError::NoDataFound)

              DBUG_RETURN(ndb_err(trans));


            DBUG_PRINT("info", ("Unique range op has no result"));

            /* Indicate to read_multi_range_next that this

             * result is empty

             */

            rangeInfo->range_flag |= EMPTY_RANGE;

          }


          /* Move to next completed operation */

          rangeOp= trans->getNextCompletedOperation(rangeOp);

        }


        /* For scan ranges, do nothing here */

      }

    }

    else

      ERR_RETURN(trans->getNdbError());

  }


  DBUG_RETURN(read_multi_range_next(found_range_p));

}


int

ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)

{

  DBUG_ENTER("ha_ndbcluster::read_multi_range_next");

  if (m_disable_multi_read)

  {

    DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p));

  }


  const ulong reclength= table_share->reclength;


  while (multi_range_curr < m_multi_range_defined_end)

  {

    if (multi_range_curr->range_flag & SKIP_RANGE)

    {

      /* Nothing in this range, move to next one, skipping a buffer

       'slot'

      */

      m_multi_range_result_ptr += reclength;

      multi_range_curr++;

    }

    else if (multi_range_curr->range_flag & READ_KEY_FROM_RANGE)

    {

      DBUG_PRINT("info", ("using read before write removal optimisation"));

      KEY* key_info= table->key_info + active_index;

      key_restore(table->record[0], (uchar*)multi_range_curr->start_key.key,

                  key_info, key_info->key_length);

      table->status= 0;

      multi_range_curr++;

      DBUG_RETURN(0);

    }

    else if (multi_range_curr->range_flag & UNIQUE_RANGE)

    {

      /*

        Move to next range; we can have at most one record from a unique range.

      */

      KEY_MULTI_RANGE *old_multi_range_curr= multi_range_curr;

      multi_range_curr= old_multi_range_curr + 1;

      /*

        Clear m_active_cursor; it is used as a flag in update_row() /

        delete_row() to know whether the current tuple is from a scan

        or pk operation.

      */

      m_active_cursor= NULL;

      const uchar *src_row= m_multi_range_result_ptr;

      m_multi_range_result_ptr= src_row + table_share->reclength;


      if (!(old_multi_range_curr->range_flag & EMPTY_RANGE))

      {

        *multi_range_found_p= old_multi_range_curr;

        memcpy(table->record[0], src_row, table_share->reclength);

        DBUG_RETURN(0);

      }


      /* No row found, so fall through to try the next range. */

    }

    else

    {

      /* An index scan range. */

      {

        int res;

        if ((res= read_multi_range_fetch_next()) != 0)

          DBUG_RETURN(res);

      }

      if (!m_next_row)

      {

        /*

          The whole scan is done, and the cursor has been closed.

          So nothing more for this range. Move to next.

        */

        multi_range_curr++;

      }

      else

      {

        int current_range_no= m_current_range_no;

        int expected_range_no;

        /*

          For a sorted index scan, we will receive rows in increasing range_no

          order, so we can return ranges in order, pausing when range_no

          indicate that the currently processed range (multi_range_curr) is

          done.


          But for unsorted scan, we may receive a high range_no from one

          fragment followed by a low range_no from another fragment. So we

          need to process all index scan ranges together.

        */

        if (!multi_range_sorted ||

            (expected_range_no= multi_range_curr - m_multi_ranges)

                == current_range_no)

        {

          *multi_range_found_p= m_multi_ranges + current_range_no;

          /* Copy out data from the new row. */

          unpack_record(table->record[0], m_next_row);

          table->status= 0;

          /*

            Mark that we have used this row, so we need to fetch a new

            one on the next call.

          */

          m_next_row= 0;

          /*

            Set m_active_cursor; it is used as a flag in update_row() /

            delete_row() to know whether the current tuple is from a scan or

            pk operation.

          */

          m_active_cursor= m_multi_cursor;


          DBUG_RETURN(0);

        }

        else if (current_range_no > expected_range_no)

        {

          /* Nothing more in scan for this range. Move to next. */

          multi_range_curr++;

        }

        else

        {

          /*

            Should not happen. Ranges should be returned from NDB API in

            the order we requested them.

          */

          DBUG_ASSERT(0);

          multi_range_curr++;                     // Attempt to carry on

        }

      }

    }

  }


  if (multi_range_curr == multi_range_end)

  {

    DBUG_RETURN(HA_ERR_END_OF_FILE);

  }


  /*

    Read remaining ranges

  */

  DBUG_RETURN(read_multi_range_first(multi_range_found_p,

                                     multi_range_curr,

                                     multi_range_end - multi_range_curr,

                                     multi_range_sorted,

                                     multi_range_buffer));

}


/*

  Fetch next row from the ordered index cursor in multi range scan.


  We keep the next row in m_next_row, and the range_no of the

  next row in m_current_range_no. This is used in sorted index scan

  to correctly interleave rows from primary/unique key operations with

  rows from the scan.

*/

int

ha_ndbcluster::read_multi_range_fetch_next()

{

  DBUG_ENTER("read_multi_range_fetch_next");


  if (m_active_query)

  {

    DBUG_PRINT("info", ("read_multi_range_fetch_next from pushed join, m_next_row:%p", m_next_row));

    if (!m_next_row)

    {

      int res= fetch_next_pushed();

      if (res == NdbQuery::NextResult_gotRow)

      {

        m_current_range_no= 0;

//      m_current_range_no= cursor->get_range_no();  // FIXME SPJ, need rangeNo from index scan

      }

      else if (res == NdbQuery::NextResult_scanComplete)

      {

        /* We have fetched the last row from the scan. */

        m_active_query->close(FALSE);

        m_active_query= 0;

        m_next_row= 0;

        DBUG_RETURN(0);

      }

      else

      {

        /* An error. */

        DBUG_RETURN(res);

      }

    }

  }

  else if (m_multi_cursor)

  {

    if (!m_next_row)

    {

      NdbIndexScanOperation *cursor= (NdbIndexScanOperation *)m_multi_cursor;

      int res= fetch_next(cursor);

      if (res == 0)

      {

        m_current_range_no= cursor->get_range_no();

      }

      else if (res == 1)

      {

        /* We have fetched the last row from the scan. */

        cursor->close(FALSE, TRUE);

        m_active_cursor= 0;

        m_multi_cursor= 0;

        m_next_row= 0;

        DBUG_RETURN(0);

      }

      else

      {

        /* An error. */

        DBUG_RETURN(res);

      }

    }

  }

  DBUG_RETURN(0);

}

#endif


#ifndef NDB_WITHOUT_JOIN_PUSHDOWN


static

int ndbcluster_make_pushed_join(handlerton *hton,

                                THD* thd,

                                AQP::Join_plan* plan)

{

  DBUG_ENTER("ndbcluster_make_pushed_join");

  (void)ha_ndb_ext; // prevents compiler warning.


  if (THDVAR(thd, join_pushdown))

  {

    ndb_pushed_builder_ctx pushed_builder(*plan);


    for (uint i= 0; i < plan->get_access_count()-1; i++)

    {

      const AQP::Table_access* const join_root= plan->get_table_access(i);

      const ndb_pushed_join* pushed_join= NULL;


      // Try to build a ndb_pushed_join starting from 'join_root'

      int error= pushed_builder.make_pushed_join(join_root, pushed_join);

      if (unlikely(error))

      {

        if (error < 0)  // getNdbError() gives us the error code

        {

          ERR_SET(pushed_builder.getNdbError(),error);

        }

        join_root->get_table()->file->print_error(error, MYF(0));

        DBUG_RETURN(error);

      }


      // Assign any produced pushed_join definitions to

      // the ha_ndbcluster instance representing its root.

      if (pushed_join != NULL)

      {

        ha_ndbcluster* const handler=

          static_cast<ha_ndbcluster*>(join_root->get_table()->file);


        error= handler->assign_pushed_join(pushed_join);

        if (unlikely(error))

        {

          delete pushed_join;

          handler->print_error(error, MYF(0));

          DBUG_RETURN(error);

        }

      }

    }

  }

  DBUG_RETURN(0);

}

#endif


int

ha_ndbcluster::assign_pushed_join(const ndb_pushed_join* pushed_join)

{

  DBUG_ENTER("assign_pushed_join");

  m_thd_ndb->m_pushed_queries_defined++;


  for (uint i = 0; i < pushed_join->get_operation_count(); i++)

  {

    const TABLE* const tab= pushed_join->get_table(i);

    DBUG_ASSERT(tab->file->ht == ht);

    ha_ndbcluster* child= static_cast<ha_ndbcluster*>(tab->file);

    child->m_pushed_join_member= pushed_join;

    child->m_pushed_join_operation= i;

  }


  DBUG_PRINT("info", ("Assigned pushed join with %d child operations",

                      pushed_join->get_operation_count()-1));


  DBUG_RETURN(0);

}


bool

ha_ndbcluster::maybe_pushable_join(const char*& reason) const

{

  reason= "";

  if (uses_blob_value(table->read_set))

  {

    reason= "select list can't contain BLOB columns";

    return false;

  }

  if (m_user_defined_partitioning)

  {

    reason= "has user defined partioning";

    return false;

  }


  // Pushed operations may not set locks.

  const NdbOperation::LockMode lockMode= get_ndb_lock_mode(m_lock.type);

  switch (lockMode)

  {

  case NdbOperation::LM_CommittedRead:

    return true;


  case NdbOperation::LM_Read:

  case NdbOperation::LM_Exclusive:

    reason= "lock modes other than 'read committed' not implemented";

    return false;


  default: // Other lock modes not used by handler.

    assert(false);

    return false;

  }


  return true;

}


#ifndef NDB_WITHOUT_JOIN_PUSHDOWN

bool

ha_ndbcluster::check_if_pushable(int type,  //NdbQueryOperationDef::Type,

                                 uint idx,

                                 bool needSorted) const

{

  if (m_disable_pushed_join)

  {

    DBUG_PRINT("info", ("Push disabled (HA_EXTRA_KEYREAD)"));

    return false;

  }

  return   m_pushed_join_operation == PUSHED_ROOT

        && m_pushed_join_member    != NULL

        && m_pushed_join_member->match_definition(

                        type,

                        (idx<MAX_KEY) ? &m_index[idx] : NULL,

                        needSorted);

}


int

ha_ndbcluster::create_pushed_join(const NdbQueryParamValue* keyFieldParams, uint paramCnt)

{

  DBUG_ENTER("create_pushed_join");

  DBUG_ASSERT(m_pushed_join_member && m_pushed_join_operation == PUSHED_ROOT);


  NdbQuery* const query=

    m_pushed_join_member->make_query_instance(m_thd_ndb->trans, keyFieldParams, paramCnt);


  if (unlikely(query==NULL))

    ERR_RETURN(m_thd_ndb->trans->getNdbError());


  // Bind to instantiated NdbQueryOperations.

  for (uint i= 0; i < m_pushed_join_member->get_operation_count(); i++)

  {

    const TABLE* const tab= m_pushed_join_member->get_table(i);

    ha_ndbcluster* handler= static_cast<ha_ndbcluster*>(tab->file);


    DBUG_ASSERT(handler->m_pushed_join_operation==(int)i);

    NdbQueryOperation* const op= query->getQueryOperation(i);

    handler->m_pushed_operation= op;


    // Bind to result buffers

    const NdbRecord* const resultRec= handler->m_ndb_record;

    int res= op->setResultRowRef(

                        resultRec,

                        handler->_m_next_row,

                        (uchar *)(tab->read_set->bitmap));

    if (unlikely(res))

      ERR_RETURN(query->getNdbError());


    // We clear 'm_next_row' to say that no row was fetched from the query yet.

    handler->_m_next_row= 0;

  }


  DBUG_ASSERT(m_active_query==NULL);

  m_active_query= query;

  m_thd_ndb->m_pushed_queries_executed++;


  DBUG_RETURN(0);

}

#endif


bool

ha_ndbcluster::check_is_pushed() const

{

  if (m_pushed_join_member == NULL)

    return false;


  handler *root= m_pushed_join_member->get_table(PUSHED_ROOT)->file;

  return (static_cast<ha_ndbcluster*>(root)->m_active_query);

}


uint

ha_ndbcluster::number_of_pushed_joins() const

{

  if (m_pushed_join_member == NULL)

    return 0;

  else

    return m_pushed_join_member->get_operation_count();

}


const TABLE*

ha_ndbcluster::root_of_pushed_join() const

{

  if (m_pushed_join_member == NULL)

    return NULL;

  else

    return m_pushed_join_member->get_table(PUSHED_ROOT);

}


const TABLE*

ha_ndbcluster::parent_of_pushed_join() const

{

  if (m_pushed_join_operation > PUSHED_ROOT)

  {

    DBUG_ASSERT(m_pushed_join_member!=NULL);

    uint parent_ix= m_pushed_join_member

                    ->get_query_def().getQueryOperation(m_pushed_join_operation)

                    ->getParentOperation(0)

                    ->getQueryOperationIx();

    return m_pushed_join_member->get_table(parent_ix);

  }

  return NULL;

}


char*

ha_ndbcluster::update_table_comment(

                                /* out: table comment + additional */

        const char*     comment)/* in:  table comment defined by user */

{

  THD *thd= current_thd;

  uint length= strlen(comment);

  if (length > 64000 - 3)

  {

    return((char*)comment); /* string too long */

  }


  Ndb* ndb;

  if (!(ndb= get_ndb(thd)))

  {

    return((char*)comment);

  }


  if (ndb->setDatabaseName(m_dbname))

  {

    return((char*)comment);

  }

  const NDBTAB* tab= m_table;

  DBUG_ASSERT(tab != NULL);


  char *str;

  const char *fmt="%s%snumber_of_replicas: %d";

  const unsigned fmt_len_plus_extra= length + strlen(fmt);

  if ((str= (char*) my_malloc(fmt_len_plus_extra, MYF(0))) == NULL)

  {

    sql_print_error("ha_ndbcluster::update_table_comment: "

                    "my_malloc(%u) failed", (unsigned int)fmt_len_plus_extra);

    return (char*)comment;

  }


  my_snprintf(str,fmt_len_plus_extra,fmt,comment,

              length > 0 ? " ":"",

              tab->getReplicaCount());

  return str;

}


pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused)))

{

  THD *thd; /* needs to be first for thread_stack */

  struct timespec abstime;

  Thd_ndb *thd_ndb= NULL;

  uint share_list_size= 0;

  NDB_SHARE **share_list= NULL;


  my_thread_init();

  DBUG_ENTER("ndb_util_thread");

  DBUG_PRINT("enter", ("cache_check_time: %lu", opt_ndb_cache_check_time));


   pthread_mutex_lock(&LOCK_ndb_util_thread);


  thd= new THD; /* note that contructor of THD uses DBUG_ */

  if (thd == NULL)

  {

    my_errno= HA_ERR_OUT_OF_MEM;

    DBUG_RETURN(NULL);

  }

  THD_CHECK_SENTRY(thd);

  pthread_detach_this_thread();

  ndb_util_thread= pthread_self();


  thd->thread_stack= (char*)&thd; /* remember where our stack is */

  if (thd->store_globals())

    goto ndb_util_thread_fail;

  lex_start(thd);

  thd->init_for_queries();

  thd_set_command(thd, COM_DAEMON);

#ifndef NDB_THD_HAS_NO_VERSION

  thd->version=refresh_version;

#endif

  thd->client_capabilities = 0;

  thd->security_ctx->skip_grants();

  my_net_init(&thd->net, 0);


  CHARSET_INFO *charset_connection;

  charset_connection= get_charset_by_csname("utf8",

                                            MY_CS_PRIMARY, MYF(MY_WME));

  thd->variables.character_set_client= charset_connection;

  thd->variables.character_set_results= charset_connection;

  thd->variables.collation_connection= charset_connection;

  thd->update_charset();


  /* Signal successful initialization */

  ndb_util_thread_running= 1;

  pthread_cond_signal(&COND_ndb_util_ready);

  pthread_mutex_unlock(&LOCK_ndb_util_thread);


  /*

    wait for mysql server to start

  */

  mysql_mutex_lock(&LOCK_server_started);

  while (!mysqld_server_started)

  {

    set_timespec(abstime, 1);

    mysql_cond_timedwait(&COND_server_started, &LOCK_server_started,

                         &abstime);

    if (ndbcluster_terminating)

    {

      mysql_mutex_unlock(&LOCK_server_started);

      pthread_mutex_lock(&LOCK_ndb_util_thread);

      goto ndb_util_thread_end;

    }

  }

  mysql_mutex_unlock(&LOCK_server_started);


  /*

    Wait for cluster to start

  */

  pthread_mutex_lock(&LOCK_ndb_util_thread);

  while (!g_ndb_status.cluster_node_id && (ndbcluster_hton->slot != ~(uint)0))

  {

    /* ndb not connected yet */

    pthread_cond_wait(&COND_ndb_util_thread, &LOCK_ndb_util_thread);

    if (ndbcluster_terminating)

      goto ndb_util_thread_end;

  }

  pthread_mutex_unlock(&LOCK_ndb_util_thread);


  /* Get thd_ndb for this thread */

  if (!(thd_ndb= Thd_ndb::seize(thd)))

  {

    sql_print_error("Could not allocate Thd_ndb object");

    pthread_mutex_lock(&LOCK_ndb_util_thread);

    goto ndb_util_thread_end;

  }

  thd_set_thd_ndb(thd, thd_ndb);

  thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP;


  if (opt_ndb_extra_logging && ndb_binlog_running)

    sql_print_information("NDB Binlog: Ndb tables initially read only.");


  set_timespec(abstime, 0);

  for (;;)

  {

    pthread_mutex_lock(&LOCK_ndb_util_thread);

    if (!ndbcluster_terminating)

      pthread_cond_timedwait(&COND_ndb_util_thread,

                             &LOCK_ndb_util_thread,

                             &abstime);

    if (ndbcluster_terminating) /* Shutting down server */

      goto ndb_util_thread_end;

    pthread_mutex_unlock(&LOCK_ndb_util_thread);

#ifdef NDB_EXTRA_DEBUG_UTIL_THREAD

    DBUG_PRINT("ndb_util_thread", ("Started, cache_check_time: %lu",

                                   opt_ndb_cache_check_time));

#endif


    /*

      Check if the Ndb object in thd_ndb is still valid(it will be

      invalid if connection to cluster has been lost) and recycle

      it if necessary.

    */

    if (!check_ndb_in_thd(thd, false))

    {

      set_timespec(abstime, 1);

      continue;

    }


    /*

      Regularly give the ndb_binlog component chance to set it self up

      i.e at first start it needs to create the ndb_* system tables

      and setup event operations on those. In case of lost connection

      to cluster, the ndb_* system tables are hopefully still there

      but the event operations need to be recreated.

    */

    if (!ndb_binlog_setup(thd))

    {

      /* Failed to setup binlog, try again in 1 second */

      set_timespec(abstime, 1);

      continue;

    }


    if (opt_ndb_cache_check_time == 0)

    {

      /* Wake up in 1 second to check if value has changed */

      set_timespec(abstime, 1);

      continue;

    }


    /* Lock mutex and fill list with pointers to all open tables */

    NDB_SHARE *share;

    pthread_mutex_lock(&ndbcluster_mutex);

    uint i, open_count, record_count= ndbcluster_open_tables.records;

    if (share_list_size < record_count)

    {

      NDB_SHARE ** new_share_list= new NDB_SHARE * [record_count];

      if (!new_share_list)

      {

        sql_print_warning("ndb util thread: malloc failure, "

                          "query cache not maintained properly");

        pthread_mutex_unlock(&ndbcluster_mutex);

        goto next;                               // At least do not crash

      }

      delete [] share_list;

      share_list_size= record_count;

      share_list= new_share_list;

    }

    for (i= 0, open_count= 0; i < record_count; i++)

    {

      share= (NDB_SHARE *)my_hash_element(&ndbcluster_open_tables, i);

      if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))

          <= 0)

        continue; // injector thread is the only user, skip statistics

      /* ndb_share reference temporary, free below */

      share->use_count++; /* Make sure the table can't be closed */

      share->util_thread= true;

      DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",

                               share->key, share->use_count));

      DBUG_PRINT("ndb_util_thread",

                 ("Found open table[%d]: %s, use_count: %d",

                  i, share->table_name, share->use_count));


      /* Store pointer to table */

      share_list[open_count++]= share;

    }

    pthread_mutex_unlock(&ndbcluster_mutex);


    /* Iterate through the open files list */

    for (i= 0; i < open_count; i++)

    {

      share= share_list[i];

      if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))

          <= 1)

      {

        /*

          Util thread and injector thread is the only user, skip statistics

        */

        /* ndb_share reference temporary free */

        DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                                 share->key, share->use_count));


        pthread_mutex_lock(&ndbcluster_mutex);

        share->util_thread= false;

        free_share(&share, true);

        pthread_mutex_unlock(&ndbcluster_mutex);

        continue;

      }

      DBUG_PRINT("ndb_util_thread",

                 ("Fetching commit count for: %s", share->key));


      struct Ndb_statistics stat;

      uint lock;

      pthread_mutex_lock(&share->mutex);

      lock= share->commit_count_lock;

      pthread_mutex_unlock(&share->mutex);

      {

        /* Contact NDB to get commit count for table */

        Ndb* ndb= thd_ndb->ndb;

        if (ndb->setDatabaseName(share->db))

        {

          goto loop_next;

        }

        Ndb_table_guard ndbtab_g(ndb->getDictionary(), share->table_name);

        if (ndbtab_g.get_table() &&

            ndb_get_table_statistics(thd, NULL, FALSE, ndb,

                                     ndbtab_g.get_table()->getDefaultRecord(),

                                     &stat) == 0)

        {

#ifndef DBUG_OFF

          char buff[22], buff2[22];

#endif

          DBUG_PRINT("info",

                     ("Table: %s  commit_count: %s  rows: %s",

                      share->key,

                      llstr(stat.commit_count, buff),

                      llstr(stat.row_count, buff2)));

        }

        else

        {

          DBUG_PRINT("ndb_util_thread",

                     ("Error: Could not get commit count for table %s",

                      share->key));

          stat.commit_count= 0;

        }

      }

  loop_next:

      pthread_mutex_lock(&share->mutex);

      if (share->commit_count_lock == lock)

        share->commit_count= stat.commit_count;

      pthread_mutex_unlock(&share->mutex);


      /* ndb_share reference temporary free */

      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",

                               share->key, share->use_count));

      pthread_mutex_lock(&ndbcluster_mutex);

      share->util_thread= false;

      free_share(&share, true);

      pthread_mutex_unlock(&ndbcluster_mutex);

    }

next:

    /* Calculate new time to wake up */

    set_timespec_nsec(abstime, opt_ndb_cache_check_time * 1000000ULL);

  }


  pthread_mutex_lock(&LOCK_ndb_util_thread);


ndb_util_thread_end:

  net_end(&thd->net);

ndb_util_thread_fail:

  if (share_list)

    delete [] share_list;

  if (thd_ndb)

  {

    Thd_ndb::release(thd_ndb);

    thd_set_thd_ndb(thd, NULL);

  }

  thd->cleanup();

  delete thd;


  /* signal termination */

  ndb_util_thread_running= 0;

  pthread_cond_signal(&COND_ndb_util_ready);

  pthread_mutex_unlock(&LOCK_ndb_util_thread);

  DBUG_PRINT("exit", ("ndb_util_thread"));


  DBUG_LEAVE;                               // Must match DBUG_ENTER()

  my_thread_end();

  pthread_exit(0);

  return NULL;                              // Avoid compiler warnings

}


/*

  Condition pushdown

*/

const

Item*

ha_ndbcluster::cond_push(const Item *cond)

{

  DBUG_ENTER("ha_ndbcluster::cond_push");


#if 1

  if (cond->used_tables() & ~table->map)

  {

    DBUG_EXECUTE("where",print_where((Item *)cond, "Rejected cond_push", QT_ORDINARY););

    DBUG_RETURN(cond);

  }

#else

  /*

    Make sure that 'cond' does not refer field(s) from other tables

    or other instances of this table.

    (This was a legacy bug in optimizer)

  */

  DBUG_ASSERT(!(cond->used_tables() & ~table->map));

#endif

  if (!m_cond)

    m_cond= new ha_ndbcluster_cond;

  if (!m_cond)

  {

    my_errno= HA_ERR_OUT_OF_MEM;

    DBUG_RETURN(cond);

  }

  DBUG_EXECUTE("where",print_where((Item *)cond, m_tabname, QT_ORDINARY););

  DBUG_RETURN(m_cond->cond_push(cond, table, (NDBTAB *)m_table));

}


void

ha_ndbcluster::cond_pop()

{

  if (m_cond)

    m_cond->cond_pop();

}


/*

  Implements the SHOW NDB STATUS command.

*/

bool

ndbcluster_show_status(handlerton *hton, THD* thd, stat_print_fn *stat_print,

                       enum ha_stat_type stat_type)

{

  char name[16];

  char buf[IO_SIZE];

  uint buflen;

  DBUG_ENTER("ndbcluster_show_status");


  if (stat_type != HA_ENGINE_STATUS)

  {

    DBUG_RETURN(FALSE);

  }


  Ndb* ndb= check_ndb_in_thd(thd);

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  struct st_ndb_status ns;

  if (ndb)

    update_status_variables(thd_ndb, &ns, thd_ndb->connection);

  else

    update_status_variables(NULL, &ns, g_ndb_cluster_connection);


  buflen=

    my_snprintf(buf, sizeof(buf),

                "cluster_node_id=%ld, "

                "connected_host=%s, "

                "connected_port=%ld, "

                "number_of_data_nodes=%ld, "

                "number_of_ready_data_nodes=%ld, "

                "connect_count=%ld",

                ns.cluster_node_id,

                ns.connected_host,

                ns.connected_port,

                ns.number_of_data_nodes,

                ns.number_of_ready_data_nodes,

                ns.connect_count);

  if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,

                 STRING_WITH_LEN("connection"), buf, buflen))

    DBUG_RETURN(TRUE);


  for (int i= 0; i < MAX_NDB_NODES; i++)

  {

    if (ns.transaction_hint_count[i] > 0 ||

        ns.transaction_no_hint_count[i] > 0)

    {

      uint namelen= my_snprintf(name, sizeof(name), "node[%d]", i);

      buflen= my_snprintf(buf, sizeof(buf),

                          "transaction_hint=%ld, transaction_no_hint=%ld",

                          ns.transaction_hint_count[i],

                          ns.transaction_no_hint_count[i]);

      if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,

                     name, namelen, buf, buflen))

        DBUG_RETURN(TRUE);

    }

  }


  if (ndb)

  {

    Ndb::Free_list_usage tmp;

    tmp.m_name= 0;

    while (ndb->get_free_list_usage(&tmp))

    {

      buflen=

        my_snprintf(buf, sizeof(buf),

                  "created=%u, free=%u, sizeof=%u",

                  tmp.m_created, tmp.m_free, tmp.m_sizeof);

      if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,

                     tmp.m_name, strlen(tmp.m_name), buf, buflen))

        DBUG_RETURN(TRUE);

    }

  }

  ndbcluster_show_status_binlog(thd, stat_print, stat_type);


  DBUG_RETURN(FALSE);

}


int ha_ndbcluster::get_default_no_partitions(HA_CREATE_INFO *create_info)

{

  if (unlikely(g_ndb_cluster_connection->get_no_ready() <= 0))

  {

err:

    my_error(HA_ERR_NO_CONNECTION, MYF(0));

    return -1;

  }


  THD* thd = current_thd;

  if (thd == 0)

    goto err;

  Thd_ndb * thd_ndb = get_thd_ndb(thd);

  if (thd_ndb == 0)

    goto err;


  ha_rows max_rows, min_rows;

  if (create_info)

  {

    max_rows= create_info->max_rows;

    min_rows= create_info->min_rows;

  }

  else

  {

    max_rows= table_share->max_rows;

    min_rows= table_share->min_rows;

  }

  uint no_fragments= get_no_fragments(max_rows >= min_rows ?

                                      max_rows : min_rows);

  uint reported_frags;

  adjusted_frag_count(thd_ndb->ndb,

                      no_fragments,

                      reported_frags);

  return reported_frags;

}


uint32 ha_ndbcluster::calculate_key_hash_value(Field **field_array)

{

  Uint32 hash_value;

  struct Ndb::Key_part_ptr key_data[MAX_REF_PARTS];

  struct Ndb::Key_part_ptr *key_data_ptr= &key_data[0];

  Uint32 i= 0;

  int ret_val;

  Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];

  void *buf= (void*)&tmp[0];

  Ndb *ndb= m_thd_ndb->ndb;

  DBUG_ENTER("ha_ndbcluster::calculate_key_hash_value");


  do

  {

    Field *field= *field_array;

    uint len= field->data_length();

    DBUG_ASSERT(!field->is_real_null());

    if (field->real_type() == MYSQL_TYPE_VARCHAR)

      len+= ((Field_varstring*)field)->length_bytes;

    key_data[i].ptr= field->ptr;

    key_data[i++].len= len;

  } while (*(++field_array));

  key_data[i].ptr= 0;

  if ((ret_val= ndb->computeHash(&hash_value, m_table,

                                 key_data_ptr, buf, sizeof(tmp))))

  {

    DBUG_PRINT("info", ("ret_val = %d", ret_val));

    DBUG_ASSERT(FALSE);

    abort();

  }

  DBUG_RETURN(hash_value);

}


/*

  Set-up auto-partitioning for NDB Cluster


  SYNOPSIS

    set_auto_partitions()

    part_info                  Partition info struct to set-up


  RETURN VALUE

    NONE


  DESCRIPTION

    Set-up auto partitioning scheme for tables that didn't define any

    partitioning. We'll use PARTITION BY KEY() in this case which

    translates into partition by primary key if a primary key exists

    and partition by hidden key otherwise.

*/


enum ndb_distribution_enum {

  NDB_DISTRIBUTION_KEYHASH= 0,

  NDB_DISTRIBUTION_LINHASH= 1

};

static const char* distribution_names[]= { "KEYHASH", "LINHASH", NullS };

static ulong opt_ndb_distribution;

static TYPELIB distribution_typelib= {

  array_elements(distribution_names) - 1,

  "",

  distribution_names,

  NULL

};

static MYSQL_SYSVAR_ENUM(

  distribution,                      /* name */

  opt_ndb_distribution,              /* var */

  PLUGIN_VAR_RQCMDARG,

  "Default distribution for new tables in ndb",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  NDB_DISTRIBUTION_KEYHASH,          /* default */

  &distribution_typelib              /* typelib */

);


void ha_ndbcluster::set_auto_partitions(partition_info *part_info)

{

  DBUG_ENTER("ha_ndbcluster::set_auto_partitions");

  part_info->list_of_part_fields= TRUE;

  part_info->part_type= HASH_PARTITION;

  switch (opt_ndb_distribution)

  {

  case NDB_DISTRIBUTION_KEYHASH:

    part_info->linear_hash_ind= FALSE;

    break;

  case NDB_DISTRIBUTION_LINHASH:

    part_info->linear_hash_ind= TRUE;

    break;

  default:

    DBUG_ASSERT(false);

    break;

  }

  DBUG_VOID_RETURN;

}


int

ha_ndbcluster::set_range_data(const partition_info *part_info,

                              NdbDictionary::Table& ndbtab) const

{

  const uint num_parts = partition_info_num_parts(part_info);

  int error= 0;

  bool unsigned_flag= part_info->part_expr->unsigned_flag;

  DBUG_ENTER("set_range_data");


  int32 *range_data= (int32*)my_malloc(num_parts*sizeof(int32), MYF(0));

  if (!range_data)

  {

    mem_alloc_error(num_parts*sizeof(int32));

    DBUG_RETURN(1);

  }

  for (uint i= 0; i < num_parts; i++)

  {

    longlong range_val= part_info->range_int_array[i];

    if (unsigned_flag)

      range_val-= 0x8000000000000000ULL;

    if (range_val < INT_MIN32 || range_val >= INT_MAX32)

    {

      if ((i != num_parts - 1) ||

          (range_val != LONGLONG_MAX))

      {

        my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");

        error= 1;

        goto error;

      }

      range_val= INT_MAX32;

    }

    range_data[i]= (int32)range_val;

  }

  ndbtab.setRangeListData(range_data, num_parts);

error:

  my_free((char*)range_data, MYF(0));

  DBUG_RETURN(error);

}


int

ha_ndbcluster::set_list_data(const partition_info *part_info,

                             NdbDictionary::Table& ndbtab) const

{

  const uint num_list_values = partition_info_num_list_values(part_info);

  int32 *list_data= (int32*)my_malloc(num_list_values*2*sizeof(int32), MYF(0));

  int error= 0;

  bool unsigned_flag= part_info->part_expr->unsigned_flag;

  DBUG_ENTER("set_list_data");


  if (!list_data)

  {

    mem_alloc_error(num_list_values*2*sizeof(int32));

    DBUG_RETURN(1);

  }

  for (uint i= 0; i < num_list_values; i++)

  {

    LIST_PART_ENTRY *list_entry= &part_info->list_array[i];

    longlong list_val= list_entry->list_value;

    if (unsigned_flag)

      list_val-= 0x8000000000000000ULL;

    if (list_val < INT_MIN32 || list_val > INT_MAX32)

    {

      my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");

      error= 1;

      goto error;

    }

    list_data[2*i]= (int32)list_val;

    list_data[2*i+1]= list_entry->partition_id;

  }

  ndbtab.setRangeListData(list_data, 2*num_list_values);

error:

  my_free((char*)list_data, MYF(0));

  DBUG_RETURN(error);

}


/*

  User defined partitioning set-up. We need to check how many fragments the

  user wants defined and which node groups to put those into. Later we also

  want to attach those partitions to a tablespace.


  All the functionality of the partition function, partition limits and so

  forth are entirely handled by the MySQL Server. There is one exception to

  this rule for PARTITION BY KEY where NDB handles the hash function and

  this type can thus be handled transparently also by NDB API program.

  For RANGE, HASH and LIST and subpartitioning the NDB API programs must

  implement the function to map to a partition.

*/


int

ha_ndbcluster::set_up_partition_info(partition_info *part_info,

                                     NdbDictionary::Table& ndbtab) const

{

  uint32 frag_data[MAX_PARTITIONS];

  char *ts_names[MAX_PARTITIONS];

  ulong fd_index= 0, i, j;

  NDBTAB::FragmentType ftype= NDBTAB::UserDefined;

  partition_element *part_elem;

  List_iterator<partition_element> part_it(part_info->partitions);

  int error;

  DBUG_ENTER("ha_ndbcluster::set_up_partition_info");


  if (part_info->part_type == HASH_PARTITION &&

      part_info->list_of_part_fields == TRUE)

  {

    Field **fields= part_info->part_field_array;


    ftype= NDBTAB::HashMapPartition;


    for (i= 0; i < part_info->part_field_list.elements; i++)

    {

      NDBCOL *col= ndbtab.getColumn(fields[i]->field_index);

      DBUG_PRINT("info",("setting dist key on %s", col->getName()));

      col->setPartitionKey(TRUE);

    }

  }

  else

  {

    if (!current_thd->variables.new_mode)

    {

      push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                          ER_ILLEGAL_HA_CREATE_OPTION,

                          ER(ER_ILLEGAL_HA_CREATE_OPTION),

                          ndbcluster_hton_name,

                          "LIST, RANGE and HASH partition disabled by default,"

                          " use --new option to enable");

      DBUG_RETURN(HA_ERR_UNSUPPORTED);

    }

   /*

      Create a shadow field for those tables that have user defined

      partitioning. This field stores the value of the partition

      function such that NDB can handle reorganisations of the data

      even when the MySQL Server isn't available to assist with

      calculation of the partition function value.

    */

    NDBCOL col;

    DBUG_PRINT("info", ("Generating partition func value field"));

    col.setName("$PART_FUNC_VALUE");

    col.setType(NdbDictionary::Column::Int);

    col.setLength(1);

    col.setNullable(FALSE);

    col.setPrimaryKey(FALSE);

    col.setAutoIncrement(FALSE);

    ndbtab.addColumn(col);

    if (part_info->part_type == RANGE_PARTITION)

    {

      if ((error= set_range_data(part_info, ndbtab)))

      {

        DBUG_RETURN(error);

      }

    }

    else if (part_info->part_type == LIST_PARTITION)

    {

      if ((error= set_list_data(part_info, ndbtab)))

      {

        DBUG_RETURN(error);

      }

    }

  }

  ndbtab.setFragmentType(ftype);

  i= 0;

  do

  {

    uint ng;

    part_elem= part_it++;

    if (!part_info->is_sub_partitioned())

    {

      ng= part_elem->nodegroup_id;

      ts_names[fd_index]= part_elem->tablespace_name;

      frag_data[fd_index++]= ng;

    }

    else

    {

      List_iterator<partition_element> sub_it(part_elem->subpartitions);

      j= 0;

      do

      {

        part_elem= sub_it++;

        ng= part_elem->nodegroup_id;

        ts_names[fd_index]= part_elem->tablespace_name;

        frag_data[fd_index++]= ng;

      } while (++j < partition_info_num_subparts(part_info));

    }

  } while (++i < partition_info_num_parts(part_info));


  const bool use_default_num_parts =

    partition_info_use_default_num_partitions(part_info);

  ndbtab.setDefaultNoPartitionsFlag(use_default_num_parts);

  ndbtab.setLinearFlag(part_info->linear_hash_ind);

  {

    ha_rows max_rows= table_share->max_rows;

    ha_rows min_rows= table_share->min_rows;

    if (max_rows < min_rows)

      max_rows= min_rows;

    if (max_rows != (ha_rows)0) /* default setting, don't set fragmentation */

    {

      ndbtab.setMaxRows(max_rows);

      ndbtab.setMinRows(min_rows);

    }

  }

  ndbtab.setFragmentCount(fd_index);

  ndbtab.setFragmentData(frag_data, fd_index);

  DBUG_RETURN(0);

}


#ifndef NDB_WITHOUT_ONLINE_ALTER

static

HA_ALTER_FLAGS supported_alter_operations()

{

  HA_ALTER_FLAGS alter_flags;

  return alter_flags |

    HA_ADD_INDEX |

    HA_DROP_INDEX |

    HA_ADD_UNIQUE_INDEX |

    HA_DROP_UNIQUE_INDEX |

    HA_ADD_COLUMN |

    HA_COLUMN_STORAGE |

    HA_COLUMN_FORMAT |

    HA_ADD_PARTITION |

    HA_ALTER_TABLE_REORG |

    HA_CHANGE_AUTOINCREMENT_VALUE;

}


int ha_ndbcluster::check_if_supported_alter(TABLE *altered_table,

                                            HA_CREATE_INFO *create_info,

                                            Alter_info *alter_info,

                                            HA_ALTER_FLAGS *alter_flags,

                                            uint table_changes)

{

  THD *thd= current_thd;

  HA_ALTER_FLAGS not_supported= ~(supported_alter_operations());

  uint i;

  const NDBTAB *tab= (const NDBTAB *) m_table;

  HA_ALTER_FLAGS add_column;

  HA_ALTER_FLAGS adding;

  HA_ALTER_FLAGS dropping;


  DBUG_ENTER("ha_ndbcluster::check_if_supported_alter");

  add_column= add_column | HA_ADD_COLUMN;

  adding= adding | HA_ADD_INDEX | HA_ADD_UNIQUE_INDEX;

  dropping= dropping | HA_DROP_INDEX | HA_DROP_UNIQUE_INDEX;

  partition_info *part_info= altered_table->part_info;

  const NDBTAB *old_tab= m_table;


  if (THDVAR(thd, use_copying_alter_table))

  {

    DBUG_PRINT("info", ("On-line alter table disabled"));

    DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

  }

#ifndef DBUG_OFF

  {

    char dbug_string[HA_MAX_ALTER_FLAGS+1];

    alter_flags->print(dbug_string);

    DBUG_PRINT("info", ("Not supported %s", dbug_string));

  }

#endif


  if (alter_flags->is_set(HA_ALTER_TABLE_REORG))

  {

    /*

      sql_partition.cc tries to compute what is going on

      and sets flags...that we clear

    */

    if (part_info->use_default_num_partitions)

    {

      alter_flags->clear_bit(HA_COALESCE_PARTITION);

      alter_flags->clear_bit(HA_ADD_PARTITION);

    }

  }


  if ((*alter_flags & not_supported).is_set())

  {

#ifndef DBUG_OFF

    HA_ALTER_FLAGS tmp = *alter_flags;

    tmp&= not_supported;

    char dbug_string[HA_MAX_ALTER_FLAGS+1];

    tmp.print(dbug_string);

    DBUG_PRINT("info", ("Detected unsupported change: %s", dbug_string));

#endif

    DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

  }


  if (alter_flags->is_set(HA_ADD_COLUMN) ||

      alter_flags->is_set(HA_ADD_PARTITION) ||

      alter_flags->is_set(HA_ALTER_TABLE_REORG))

  {

     Ndb *ndb= get_ndb(thd);

     NDBDICT *dict= ndb->getDictionary();

     ndb->setDatabaseName(m_dbname);

     NdbDictionary::Table new_tab= *old_tab;


     if (alter_flags->is_set(HA_ADD_COLUMN))

     {

       NDBCOL col;


       /*

         Check that we are only adding columns

       */

       /*

         HA_COLUMN_STORAGE & HA_COLUMN_FORMAT

         are set if they are specified in an later cmd

         even if they're no change. This is probably a bug

         conclusion: add them to add_column-mask, so that we silently "accept" them

         In case of someone trying to change a column, the HA_CHANGE_COLUMN would be set

         which we don't support, so we will still return HA_ALTER_NOT_SUPPORTED in those cases

       */

       add_column.set_bit(HA_COLUMN_STORAGE);

       add_column.set_bit(HA_COLUMN_FORMAT);

       if ((*alter_flags & ~add_column).is_set())

       {

         DBUG_PRINT("info", ("Only add column exclusively can be performed on-line"));

         DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

       }

       /*

         Check for extra fields for hidden primary key

         or user defined partitioning

       */

       if (table_share->primary_key == MAX_KEY ||

           part_info->part_type != HASH_PARTITION ||

           !part_info->list_of_part_fields)

         DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);


       /* Find the new fields */

       for (uint i= table->s->fields; i < altered_table->s->fields; i++)

       {

         Field *field= altered_table->field[i];

         DBUG_PRINT("info", ("Found new field %s", field->field_name));

         DBUG_PRINT("info", ("storage_type %i, column_format %i",

                             (uint) field->field_storage_type(),

                             (uint) field->column_format()));

         /* Create new field to check if it can be added */

         if ((my_errno= create_ndb_column(0, col, field, create_info,

                                          COLUMN_FORMAT_TYPE_DYNAMIC)))

         {

           DBUG_PRINT("info", ("create_ndb_column returned %u", my_errno));

           DBUG_RETURN(my_errno);

         }

         new_tab.addColumn(col);

       }

     }


     if (alter_flags->is_set(HA_ALTER_TABLE_REORG))

     {

       new_tab.setFragmentCount(0);

       new_tab.setFragmentData(0, 0);

     }

     else if (alter_flags->is_set(HA_ADD_PARTITION))

     {

       DBUG_PRINT("info", ("Adding partition (%u)", part_info->num_parts));

       new_tab.setFragmentCount(part_info->num_parts);

     }


     NDB_Modifiers table_modifiers(ndb_table_modifiers);

     table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,

                           create_info->comment.length);

     const NDB_Modifier* mod_nologging = table_modifiers.get("NOLOGGING");


     if (mod_nologging->m_found)

     {

       new_tab.setLogging(!mod_nologging->m_val_bool);

     }


     if (dict->supportedAlterTable(*old_tab, new_tab))

     {

       DBUG_PRINT("info", ("Adding column(s) supported on-line"));

     }

     else

     {

       DBUG_PRINT("info",("Adding column not supported on-line"));

       DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

     }

  }


  /*

    Check that we are not adding multiple indexes

  */

  if ((*alter_flags & adding).is_set())

  {

    if (((altered_table->s->keys - table->s->keys) != 1) ||

        (*alter_flags & dropping).is_set())

    {

       DBUG_PRINT("info",("Only one index can be added on-line"));

       DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

    }

  }


  /*

    Check that we are not dropping multiple indexes

  */

  if ((*alter_flags & dropping).is_set())

  {

    if (((table->s->keys - altered_table->s->keys) != 1) ||

        (*alter_flags & adding).is_set())

    {

       DBUG_PRINT("info",("Only one index can be dropped on-line"));

       DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

    }

  }


  for (i= 0; i < table->s->fields; i++)

  {

    Field *field= table->field[i];

    const NDBCOL *col= tab->getColumn(i);


    NDBCOL new_col;

    create_ndb_column(0, new_col, field, create_info);


    bool index_on_column = false;

    for (uint j= 0; j<table->s->keys; j++)

    {

      KEY* key_info= table->key_info + j;

      KEY_PART_INFO* key_part= key_info->key_part;

      KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;

      for (; key_part != end; key_part++)

      {

        if (key_part->field->field_index == i)

        {

          index_on_column= true;

          j= table->s->keys; // break outer loop

          break;

        }

      }

    }


    if (index_on_column == false && (*alter_flags & adding).is_set())

    {

      for (uint j= table->s->keys; j<altered_table->s->keys; j++)

      {

        KEY* key_info= altered_table->key_info + j;

        KEY_PART_INFO* key_part= key_info->key_part;

        KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;

        for (; key_part != end; key_part++)

        {

          if (key_part->field->field_index == i)

          {

            index_on_column= true;

            j= altered_table->s->keys; // break outer loop

            break;

          }

        }

      }

    }


    if (index_on_column)

    {

      if (field->field_storage_type() == HA_SM_DISK)

      {

        DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

      }

      new_col.setStorageType(NdbDictionary::Column::StorageTypeMemory);

    }

    else if (field->field_storage_type() == HA_SM_DEFAULT)

    {

      new_col.setStorageType(col->getStorageType());

    }


    if (col->getStorageType() != new_col.getStorageType())

    {

      DBUG_PRINT("info", ("Column storage media is changed"));

      DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

    }


    if (field->flags & FIELD_IS_RENAMED)

    {

      DBUG_PRINT("info", ("Field has been renamed, copy table"));

      DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

    }


    if ((field->flags & FIELD_IN_ADD_INDEX) &&

        (col->getStorageType() == NdbDictionary::Column::StorageTypeDisk))

    {

      DBUG_PRINT("info", ("add/drop index not supported for disk stored column"));

      DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

    }

  }


  if ((*alter_flags & HA_CHANGE_AUTOINCREMENT_VALUE).is_set())

  {

    /* Check that only auto_increment value was changed */

    HA_ALTER_FLAGS change_auto_flags=

      change_auto_flags | HA_CHANGE_AUTOINCREMENT_VALUE;

    if ((*alter_flags & ~change_auto_flags).is_set())

    {

      DBUG_PRINT("info", ("Not only auto_increment value changed"));

      DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

    }

  }

  else

  {

    /* Check that row format didn't change */

    if ((create_info->used_fields & HA_CREATE_USED_AUTO) &&

        get_row_type() != create_info->row_type)

    {

      DBUG_PRINT("info", ("Row format changed"));

      DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);

    }

  }


  DBUG_PRINT("info", ("Ndb supports ALTER on-line"));

  DBUG_RETURN(HA_ALTER_SUPPORTED_WAIT_LOCK);

}


int ha_ndbcluster::alter_table_phase1(THD *thd,

                                      TABLE *altered_table,

                                      HA_CREATE_INFO *create_info,

                                      HA_ALTER_INFO *alter_info,

                                      HA_ALTER_FLAGS *alter_flags)

{

  int error= 0;

  uint i;

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  Ndb *ndb= get_ndb(thd);

  NDBDICT *dict= ndb->getDictionary();

  ndb->setDatabaseName(m_dbname);

  NDB_ALTER_DATA *alter_data;

  const NDBTAB *old_tab;

  NdbDictionary::Table *new_tab;

  HA_ALTER_FLAGS adding;

  HA_ALTER_FLAGS dropping;


  DBUG_ENTER("alter_table_phase1");

  adding=  adding | HA_ADD_INDEX | HA_ADD_UNIQUE_INDEX;

  dropping= dropping | HA_DROP_INDEX | HA_DROP_UNIQUE_INDEX;


  if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::alter_table_phase1"))

    DBUG_RETURN(HA_ERR_NO_CONNECTION);


  if (!(alter_data= new NDB_ALTER_DATA(dict, m_table)))

    DBUG_RETURN(HA_ERR_OUT_OF_MEM);

  old_tab= alter_data->old_table;

  new_tab= alter_data->new_table;

  alter_info->data= alter_data;

#ifndef DBUG_OFF

  {

    char dbug_string[HA_MAX_ALTER_FLAGS+1];

    alter_flags->print(dbug_string);

    DBUG_PRINT("info", ("altered_table %s, alter_flags %s",

                        altered_table->s->table_name.str,

                        (char *) dbug_string));

  }

#endif


  prepare_for_alter();


  if (dict->beginSchemaTrans() == -1)

  {

    DBUG_PRINT("info", ("Failed to start schema transaction"));

    ERR_PRINT(dict->getNdbError());

    error= ndb_to_mysql_error(&dict->getNdbError());

    table->file->print_error(error, MYF(0));

    goto err;

  }


  if ((*alter_flags & adding).is_set())

  {

    KEY           *key_info;

    KEY           *key;

    uint          *idx_p;

    uint          *idx_end_p;

    KEY_PART_INFO *key_part;

    KEY_PART_INFO *part_end;

    DBUG_PRINT("info", ("Adding indexes"));

    key_info= (KEY*) thd->alloc(sizeof(KEY) * alter_info->index_add_count);

    key= key_info;

    for (idx_p=  alter_info->index_add_buffer,

         idx_end_p= idx_p + alter_info->index_add_count;

         idx_p < idx_end_p;

         idx_p++, key++)

    {

      /* Copy the KEY struct. */

      *key= alter_info->key_info_buffer[*idx_p];

      /* Fix the key parts. */

      part_end= key->key_part + key->user_defined_key_parts;

      for (key_part= key->key_part; key_part < part_end; key_part++)

        key_part->field= table->field[key_part->fieldnr];

    }

    if ((error= add_index_impl(thd, altered_table, key_info,

                               alter_info->index_add_count)))

    {

      /*

        Exchange the key_info for the error message. If we exchange

        key number by key name in the message later, we need correct info.

      */

      KEY *save_key_info= table->key_info;

      table->key_info= key_info;

      table->file->print_error(error, MYF(0));

      table->key_info= save_key_info;

      goto abort;

    }

  }


  if ((*alter_flags & dropping).is_set())

  {

    uint          *key_numbers;

    uint          *keyno_p;

    uint          *idx_p;

    uint          *idx_end_p;

    DBUG_PRINT("info", ("Renumbering indexes"));

    /* The prepare_drop_index() method takes an array of key numbers. */

    key_numbers= (uint*) thd->alloc(sizeof(uint) * alter_info->index_drop_count);

    keyno_p= key_numbers;

    /* Get the number of each key. */

    for (idx_p= alter_info->index_drop_buffer,

         idx_end_p= idx_p + alter_info->index_drop_count;

         idx_p < idx_end_p;

         idx_p++, keyno_p++)

      *keyno_p= *idx_p;

    /*

      Tell the handler to prepare for drop indexes.

      This re-numbers the indexes to get rid of gaps.

    */

    if ((error= prepare_drop_index(table, key_numbers,

                                   alter_info->index_drop_count)))

    {

      table->file->print_error(error, MYF(0));

      goto abort;

    }

  }


  if (alter_flags->is_set(HA_ADD_COLUMN))

  {

     NDBCOL col;


     /* Find the new fields */

     for (i= table->s->fields; i < altered_table->s->fields; i++)

     {

       Field *field= altered_table->field[i];

       DBUG_PRINT("info", ("Found new field %s", field->field_name));

       if ((my_errno= create_ndb_column(thd, col, field, create_info,

                                        COLUMN_FORMAT_TYPE_DYNAMIC)))

       {

         error= my_errno;

         goto abort;

       }

       /*

         If the user has not specified the field format

         make it dynamic to enable on-line add attribute

       */

       if (field->column_format() == COLUMN_FORMAT_TYPE_DEFAULT &&

           create_info->row_type == ROW_TYPE_DEFAULT &&

           col.getDynamic())

       {

         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,

                             ER_ILLEGAL_HA_CREATE_OPTION,

                             "Converted FIXED field to DYNAMIC "

                             "to enable on-line ADD COLUMN",

                             field->field_name);

       }

       new_tab->addColumn(col);

     }

  }


  if (alter_flags->is_set(HA_ALTER_TABLE_REORG) || alter_flags->is_set(HA_ADD_PARTITION))

  {

    if (alter_flags->is_set(HA_ALTER_TABLE_REORG))

    {

      new_tab->setFragmentCount(0);

      new_tab->setFragmentData(0, 0);

    }

    else if (alter_flags->is_set(HA_ADD_PARTITION))

    {

      partition_info *part_info= altered_table->part_info;

      new_tab->setFragmentCount(part_info->num_parts);

    }


    int res= dict->prepareHashMap(*old_tab, *new_tab);

    if (res == -1)

    {

      const NdbError err= dict->getNdbError();

      my_errno= ndb_to_mysql_error(&err);

      goto abort;

    }

  }


  DBUG_RETURN(0);

abort:

  if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)

        == -1)

  {

    DBUG_PRINT("info", ("Failed to abort schema transaction"));

    ERR_PRINT(dict->getNdbError());

    error= ndb_to_mysql_error(&dict->getNdbError());

  }

err:

  set_ndb_share_state(m_share, NSS_INITIAL);

  /* ndb_share reference schema free */

  DBUG_PRINT("NDB_SHARE", ("%s binlog schema free  use_count: %u",

                           m_share->key, m_share->use_count));

  free_share(&m_share); // Decrease ref_count

  delete alter_data;

  DBUG_RETURN(error);

}


int ha_ndbcluster::alter_frm(THD *thd, const char *file,

                             NDB_ALTER_DATA *alter_data)

{

  uchar *data= NULL, *pack_data= NULL;

  size_t length, pack_length;

  int error= 0;


  DBUG_ENTER("alter_frm");


  DBUG_PRINT("enter", ("file: %s", file));


  NDBDICT *dict= alter_data->dictionary;


  // TODO handle this

  DBUG_ASSERT(m_table != 0);


  DBUG_ASSERT(get_ndb_share_state(m_share) == NSS_ALTERED);

  if (readfrm(file, &data, &length) ||

      packfrm(data, length, &pack_data, &pack_length))

  {

    DBUG_PRINT("info", ("Missing frm for %s", m_tabname));

    my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));

    my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));

    error= 1;

    my_error(ER_FILE_NOT_FOUND, MYF(0), file);

  }

  else

  {

    DBUG_PRINT("info", ("Table %s has changed, altering frm in ndb",

                        m_tabname));

    const NDBTAB *old_tab= alter_data->old_table;

    NdbDictionary::Table *new_tab= alter_data->new_table;


    new_tab->setFrm(pack_data, (Uint32)pack_length);

    if (dict->alterTableGlobal(*old_tab, *new_tab))

    {

      DBUG_PRINT("info", ("On-line alter of table %s failed", m_tabname));

      error= ndb_to_mysql_error(&dict->getNdbError());

      my_error(error, MYF(0));

    }

    my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));

    my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));

  }


  /* ndb_share reference schema(?) free */

  DBUG_PRINT("NDB_SHARE", ("%s binlog schema(?) free  use_count: %u",

                           m_share->key, m_share->use_count));


  DBUG_RETURN(error);

}


int ha_ndbcluster::alter_table_phase2(THD *thd,

                                      TABLE *altered_table,

                                      HA_CREATE_INFO *create_info,

                                      HA_ALTER_INFO *alter_info,

                                      HA_ALTER_FLAGS *alter_flags)


{

  int error= 0;

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) alter_info->data;

  NDBDICT *dict= alter_data->dictionary;

  HA_ALTER_FLAGS dropping;


  DBUG_ENTER("alter_table_phase2");

  dropping= dropping  | HA_DROP_INDEX | HA_DROP_UNIQUE_INDEX;


  if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::alter_table_phase2"))

  {

    error= HA_ERR_NO_CONNECTION;

    goto err;

  }


  if ((*alter_flags & dropping).is_set())

  {

    /* Tell the handler to finally drop the indexes. */

    if ((error= final_drop_index(table)))

    {

      print_error(error, MYF(0));

      goto abort;

    }

  }


  DBUG_PRINT("info", ("getting frm file %s", altered_table->s->path.str));


  DBUG_ASSERT(alter_data);

  error= alter_frm(thd, altered_table->s->path.str, alter_data);

  if (!error)

  {

    /*

     * Alter succesful, commit schema transaction

     */

    if (dict->endSchemaTrans() == -1)

    {

      error= ndb_to_mysql_error(&dict->getNdbError());

      DBUG_PRINT("info", ("Failed to commit schema transaction, error %u",

                          error));

      table->file->print_error(error, MYF(0));

      goto err;

    }

    if ((*alter_flags & HA_CHANGE_AUTOINCREMENT_VALUE).is_set())

      error= set_auto_inc_val(thd, create_info->auto_increment_value);

    if (error)

    {

      DBUG_PRINT("info", ("Failed to set auto_increment value"));

      goto err;

    }

  }

  else // if (error)

  {

abort:

    if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)

        == -1)

    {

      DBUG_PRINT("info", ("Failed to abort schema transaction"));

      ERR_PRINT(dict->getNdbError());

    }

err:

    /* ndb_share reference schema free */

    DBUG_PRINT("NDB_SHARE", ("%s binlog schema free  use_count: %u",

                             m_share->key, m_share->use_count));

    delete alter_data;

    alter_info->data= 0;

  }

  set_ndb_share_state(m_share, NSS_INITIAL);

  free_share(&m_share); // Decrease ref_count

  DBUG_RETURN(error);

}


int ha_ndbcluster::alter_table_phase3(THD *thd, TABLE *table,

                                      HA_CREATE_INFO *create_info,

                                      HA_ALTER_INFO *alter_info,

                                      HA_ALTER_FLAGS *alter_flags)

{

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  DBUG_ENTER("alter_table_phase3");


  NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) alter_info->data;

  if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::alter_table_phase3"))

  {

    delete alter_data;

    alter_info->data= 0;

    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  }


  const char *db= table->s->db.str;

  const char *name= table->s->table_name.str;


  /*

    all mysqld's will read frms from disk and setup new

    event operation for the table (new_op)

  */

  uint32 table_id= 0, table_version= 0;

  DBUG_ASSERT(alter_data != 0);

  if (alter_data)

  {

    table_id= alter_data->table_id;

    table_version= alter_data->old_table_version;

  }

  ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),

                           db, name,

                           table_id, table_version,

                           SOT_ONLINE_ALTER_TABLE_PREPARE,

                           NULL, NULL);


  /*

    Get table id/version for new table

  */

  table_id= 0;

  table_version= 0;

  {

    Ndb* ndb= get_ndb(thd);

    DBUG_ASSERT(ndb != 0);

    if (ndb)

    {

      ndb->setDatabaseName(db);

      Ndb_table_guard ndbtab(ndb->getDictionary(), name);

      const NDBTAB *new_tab= ndbtab.get_table();

      DBUG_ASSERT(new_tab != 0);

      if (new_tab)

      {

        table_id= new_tab->getObjectId();

        table_version= new_tab->getObjectVersion();

      }

    }

  }


  /*

    all mysqld's will switch to using the new_op, and delete the old

    event operation

  */

  ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),

                           db, name,

                           table_id, table_version,

                           SOT_ONLINE_ALTER_TABLE_COMMIT,

                           NULL, NULL);


  delete alter_data;

  alter_info->data= 0;

  DBUG_RETURN(0);

}

#endif


bool set_up_tablespace(st_alter_tablespace *alter_info,

                       NdbDictionary::Tablespace *ndb_ts)

{

  if (alter_info->extent_size >= (Uint64(1) << 32))

  {

    // TODO set correct error

    return TRUE;

  }

  ndb_ts->setName(alter_info->tablespace_name);

  ndb_ts->setExtentSize(Uint32(alter_info->extent_size));

  ndb_ts->setDefaultLogfileGroup(alter_info->logfile_group_name);

  return FALSE;

}


bool set_up_datafile(st_alter_tablespace *alter_info,

                     NdbDictionary::Datafile *ndb_df)

{

  if (alter_info->max_size > 0)

  {

    my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0));

    return TRUE;

  }

  ndb_df->setPath(alter_info->data_file_name);

  ndb_df->setSize(alter_info->initial_size);

  ndb_df->setTablespace(alter_info->tablespace_name);

  return FALSE;

}


bool set_up_logfile_group(st_alter_tablespace *alter_info,

                          NdbDictionary::LogfileGroup *ndb_lg)

{

  if (alter_info->undo_buffer_size >= (Uint64(1) << 32))

  {

    // TODO set correct error

    return TRUE;

  }


  ndb_lg->setName(alter_info->logfile_group_name);

  ndb_lg->setUndoBufferSize(Uint32(alter_info->undo_buffer_size));

  return FALSE;

}


bool set_up_undofile(st_alter_tablespace *alter_info,

                     NdbDictionary::Undofile *ndb_uf)

{

  ndb_uf->setPath(alter_info->undo_file_name);

  ndb_uf->setSize(alter_info->initial_size);

  ndb_uf->setLogfileGroup(alter_info->logfile_group_name);

  return FALSE;

}


int ndbcluster_alter_tablespace(handlerton *hton,

                                THD* thd, st_alter_tablespace *alter_info)

{

  int is_tablespace= 0;

  NdbError err;

  NDBDICT *dict;

  int error;

  const char *errmsg;

  Ndb *ndb;

  DBUG_ENTER("ndbcluster_alter_tablespace");

  LINT_INIT(errmsg);


  ndb= check_ndb_in_thd(thd);

  if (ndb == NULL)

  {

    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  }

  dict= ndb->getDictionary();


  uint32 table_id= 0, table_version= 0;

  switch (alter_info->ts_cmd_type){

  case (CREATE_TABLESPACE):

  {

    error= ER_CREATE_FILEGROUP_FAILED;


    NdbDictionary::Tablespace ndb_ts;

    NdbDictionary::Datafile ndb_df;

    NdbDictionary::ObjectId objid;

    if (set_up_tablespace(alter_info, &ndb_ts))

    {

      DBUG_RETURN(1);

    }

    if (set_up_datafile(alter_info, &ndb_df))

    {

      DBUG_RETURN(1);

    }

    errmsg= "TABLESPACE";

    if (dict->createTablespace(ndb_ts, &objid))

    {

      DBUG_PRINT("error", ("createTablespace returned %d", error));

      goto ndberror;

    }

    table_id = objid.getObjectId();

    table_version = objid.getObjectVersion();

    if (dict->getWarningFlags() &

        NdbDictionary::Dictionary::WarnExtentRoundUp)

    {

      push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                          dict->getWarningFlags(),

                          "Extent size rounded up to kernel page size");

    }

    DBUG_PRINT("alter_info", ("Successfully created Tablespace"));

    errmsg= "DATAFILE";

    if (dict->createDatafile(ndb_df))

    {

      err= dict->getNdbError();

      NdbDictionary::Tablespace tmp= dict->getTablespace(ndb_ts.getName());

      if (dict->getNdbError().code == 0 &&

          tmp.getObjectId() == objid.getObjectId() &&

          tmp.getObjectVersion() == objid.getObjectVersion())

      {

        dict->dropTablespace(tmp);

      }


      DBUG_PRINT("error", ("createDatafile returned %d", error));

      goto ndberror2;

    }

    if (dict->getWarningFlags() &

        NdbDictionary::Dictionary::WarnDatafileRoundUp)

    {

      push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                          dict->getWarningFlags(),

                          "Datafile size rounded up to extent size");

    }

    else /* produce only 1 message */

    if (dict->getWarningFlags() &

        NdbDictionary::Dictionary::WarnDatafileRoundDown)

    {

      push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                          dict->getWarningFlags(),

                          "Datafile size rounded down to extent size");

    }

    is_tablespace= 1;

    break;

  }

  case (ALTER_TABLESPACE):

  {

    error= ER_ALTER_FILEGROUP_FAILED;

    if (alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE)

    {

      NdbDictionary::Datafile ndb_df;

      if (set_up_datafile(alter_info, &ndb_df))

      {

        DBUG_RETURN(1);

      }

      errmsg= " CREATE DATAFILE";

      NdbDictionary::ObjectId objid;

      if (dict->createDatafile(ndb_df, false, &objid))

      {

        goto ndberror;

      }

      table_id= objid.getObjectId();

      table_version= objid.getObjectVersion();

      if (dict->getWarningFlags() &

          NdbDictionary::Dictionary::WarnDatafileRoundUp)

      {

        push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                            dict->getWarningFlags(),

                            "Datafile size rounded up to extent size");

      }

      else /* produce only 1 message */

      if (dict->getWarningFlags() &

          NdbDictionary::Dictionary::WarnDatafileRoundDown)

      {

        push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                            dict->getWarningFlags(),

                            "Datafile size rounded down to extent size");

      }

    }

    else if(alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE)

    {

      NdbDictionary::Tablespace ts= dict->getTablespace(alter_info->tablespace_name);

      NdbDictionary::Datafile df= dict->getDatafile(0, alter_info->data_file_name);

      NdbDictionary::ObjectId objid;

      df.getTablespaceId(&objid);

      table_id = df.getObjectId();

      table_version = df.getObjectVersion();

      if (ts.getObjectId() == objid.getObjectId() &&

          strcmp(df.getPath(), alter_info->data_file_name) == 0)

      {

        errmsg= " DROP DATAFILE";

        if (dict->dropDatafile(df))

        {

          goto ndberror;

        }

      }

      else

      {

        DBUG_PRINT("error", ("No such datafile"));

        my_error(ER_ALTER_FILEGROUP_FAILED, MYF(0), " NO SUCH FILE");

        DBUG_RETURN(1);

      }

    }

    else

    {

      DBUG_PRINT("error", ("Unsupported alter tablespace: %d",

                           alter_info->ts_alter_tablespace_type));

      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);

    }

    is_tablespace= 1;

    break;

  }

  case (CREATE_LOGFILE_GROUP):

  {

    error= ER_CREATE_FILEGROUP_FAILED;

    NdbDictionary::LogfileGroup ndb_lg;

    NdbDictionary::Undofile ndb_uf;

    NdbDictionary::ObjectId objid;

    if (alter_info->undo_file_name == NULL)

    {

      /*

        REDO files in LOGFILE GROUP not supported yet

      */

      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);

    }

    if (set_up_logfile_group(alter_info, &ndb_lg))

    {

      DBUG_RETURN(1);

    }

    errmsg= "LOGFILE GROUP";

    if (dict->createLogfileGroup(ndb_lg, &objid))

    {

      goto ndberror;

    }

    table_id = objid.getObjectId();

    table_version = objid.getObjectVersion();

    if (dict->getWarningFlags() &

        NdbDictionary::Dictionary::WarnUndobufferRoundUp)

    {

      push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                          dict->getWarningFlags(),

                          "Undo buffer size rounded up to kernel page size");

    }

    DBUG_PRINT("alter_info", ("Successfully created Logfile Group"));

    if (set_up_undofile(alter_info, &ndb_uf))

    {

      DBUG_RETURN(1);

    }

    errmsg= "UNDOFILE";

    if (dict->createUndofile(ndb_uf))

    {

      err= dict->getNdbError();

      NdbDictionary::LogfileGroup tmp= dict->getLogfileGroup(ndb_lg.getName());

      if (dict->getNdbError().code == 0 &&

          tmp.getObjectId() == objid.getObjectId() &&

          tmp.getObjectVersion() == objid.getObjectVersion())

      {

        dict->dropLogfileGroup(tmp);

      }

      goto ndberror2;

    }

    if (dict->getWarningFlags() &

        NdbDictionary::Dictionary::WarnUndofileRoundDown)

    {

      push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                          dict->getWarningFlags(),

                          "Undofile size rounded down to kernel page size");

    }

    break;

  }

  case (ALTER_LOGFILE_GROUP):

  {

    error= ER_ALTER_FILEGROUP_FAILED;

    if (alter_info->undo_file_name == NULL)

    {

      /*

        REDO files in LOGFILE GROUP not supported yet

      */

      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);

    }

    NdbDictionary::Undofile ndb_uf;

    if (set_up_undofile(alter_info, &ndb_uf))

    {

      DBUG_RETURN(1);

    }

    errmsg= "CREATE UNDOFILE";

    NdbDictionary::ObjectId objid;

    if (dict->createUndofile(ndb_uf, false, &objid))

    {

      goto ndberror;

    }

    table_id = objid.getObjectId();

    table_version = objid.getObjectVersion();

    if (dict->getWarningFlags() &

        NdbDictionary::Dictionary::WarnUndofileRoundDown)

    {

      push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,

                          dict->getWarningFlags(),

                          "Undofile size rounded down to kernel page size");

    }

    break;

  }

  case (DROP_TABLESPACE):

  {

    error= ER_DROP_FILEGROUP_FAILED;

    errmsg= "TABLESPACE";

    NdbDictionary::Tablespace ts=

      dict->getTablespace(alter_info->tablespace_name);

    table_id= ts.getObjectId();

    table_version= ts.getObjectVersion();

    if (dict->dropTablespace(ts))

    {

      goto ndberror;

    }

    is_tablespace= 1;

    break;

  }

  case (DROP_LOGFILE_GROUP):

  {

    error= ER_DROP_FILEGROUP_FAILED;

    errmsg= "LOGFILE GROUP";

    NdbDictionary::LogfileGroup lg=

      dict->getLogfileGroup(alter_info->logfile_group_name);

    table_id= lg.getObjectId();

    table_version= lg.getObjectVersion();

    if (dict->dropLogfileGroup(lg))

    {

      goto ndberror;

    }

    break;

  }

  case (CHANGE_FILE_TABLESPACE):

  {

    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);

  }

  case (ALTER_ACCESS_MODE_TABLESPACE):

  {

    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);

  }

  default:

  {

    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);

  }

  }

  if (is_tablespace)

    ndbcluster_log_schema_op(thd,

                             thd->query(), thd->query_length(),

                             "", alter_info->tablespace_name,

                             table_id, table_version,

                             SOT_TABLESPACE, NULL, NULL);

  else

    ndbcluster_log_schema_op(thd,

                             thd->query(), thd->query_length(),

                             "", alter_info->logfile_group_name,

                             table_id, table_version,

                             SOT_LOGFILE_GROUP, NULL, NULL);

  DBUG_RETURN(FALSE);


ndberror:

  err= dict->getNdbError();

ndberror2:

  ndb_to_mysql_error(&err);


  my_error(error, MYF(0), errmsg);

  DBUG_RETURN(1);

}


bool ha_ndbcluster::get_no_parts(const char *name, uint *no_parts)

{

  THD *thd= current_thd;

  Ndb *ndb;

  NDBDICT *dict;

  int err;

  DBUG_ENTER("ha_ndbcluster::get_no_parts");

  LINT_INIT(err);


  set_dbname(name);

  set_tabname(name);

  for (;;)

  {

    if (check_ndb_connection(thd))

    {

      err= HA_ERR_NO_CONNECTION;

      break;

    }

    ndb= get_ndb(thd);

    ndb->setDatabaseName(m_dbname);

    Ndb_table_guard ndbtab_g(dict= ndb->getDictionary(), m_tabname);

    if (!ndbtab_g.get_table())

      ERR_BREAK(dict->getNdbError(), err);

    *no_parts= ndbtab_g.get_table()->getFragmentCount();

    DBUG_RETURN(FALSE);

  }


  print_error(err, MYF(0));

  DBUG_RETURN(TRUE);

}


static int ndbcluster_fill_files_table(handlerton *hton,

                                       THD *thd,

                                       TABLE_LIST *tables,

                                       Item *cond)

{

  TABLE* table= tables->table;

  Ndb *ndb= check_ndb_in_thd(thd);

  NdbDictionary::Dictionary* dict= ndb->getDictionary();

  NdbDictionary::Dictionary::List dflist;

  NdbError ndberr;

  uint i;

  DBUG_ENTER("ndbcluster_fill_files_table");


  dict->listObjects(dflist, NdbDictionary::Object::Datafile);

  ndberr= dict->getNdbError();

  if (ndberr.classification != NdbError::NoError)

    ERR_RETURN(ndberr);


  for (i= 0; i < dflist.count; i++)

  {

    NdbDictionary::Dictionary::List::Element& elt = dflist.elements[i];

    Ndb_cluster_connection_node_iter iter;

    uint id;


    g_ndb_cluster_connection->init_get_next_node(iter);


    while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))

    {

      init_fill_schema_files_row(table);

      NdbDictionary::Datafile df= dict->getDatafile(id, elt.name);

      ndberr= dict->getNdbError();

      if(ndberr.classification != NdbError::NoError)

      {

        if (ndberr.classification == NdbError::SchemaError)

          continue;


        if (ndberr.classification == NdbError::UnknownResultError)

          continue;


        ERR_RETURN(ndberr);

      }

      NdbDictionary::Tablespace ts= dict->getTablespace(df.getTablespace());

      ndberr= dict->getNdbError();

      if (ndberr.classification != NdbError::NoError)

      {

        if (ndberr.classification == NdbError::SchemaError)

          continue;

        ERR_RETURN(ndberr);

      }


      table->field[IS_FILES_FILE_NAME]->set_notnull();

      table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name),

                                              system_charset_info);

      table->field[IS_FILES_FILE_TYPE]->set_notnull();

      table->field[IS_FILES_FILE_TYPE]->store("DATAFILE",8,

                                              system_charset_info);

      table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();

      table->field[IS_FILES_TABLESPACE_NAME]->store(df.getTablespace(),

                                                    strlen(df.getTablespace()),

                                                    system_charset_info);

      table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();

      table->field[IS_FILES_LOGFILE_GROUP_NAME]->

        store(ts.getDefaultLogfileGroup(),

              strlen(ts.getDefaultLogfileGroup()),

              system_charset_info);

      table->field[IS_FILES_ENGINE]->set_notnull();

      table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,

                                           ndbcluster_hton_name_length,

                                           system_charset_info);


      table->field[IS_FILES_FREE_EXTENTS]->set_notnull();

      table->field[IS_FILES_FREE_EXTENTS]->store(df.getFree()

                                                 / ts.getExtentSize(), true);

      table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();

      table->field[IS_FILES_TOTAL_EXTENTS]->store(df.getSize()

                                                  / ts.getExtentSize(), true);

      table->field[IS_FILES_EXTENT_SIZE]->set_notnull();

      table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);

      table->field[IS_FILES_INITIAL_SIZE]->set_notnull();

      table->field[IS_FILES_INITIAL_SIZE]->store(df.getSize(), true);

      table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();

      table->field[IS_FILES_MAXIMUM_SIZE]->store(df.getSize(), true);

      table->field[IS_FILES_VERSION]->set_notnull();

      table->field[IS_FILES_VERSION]->store(df.getObjectVersion(), true);


      table->field[IS_FILES_ROW_FORMAT]->set_notnull();

      table->field[IS_FILES_ROW_FORMAT]->store("FIXED", 5, system_charset_info);


      char extra[30];

      int len= my_snprintf(extra, sizeof(extra), "CLUSTER_NODE=%u", id);

      table->field[IS_FILES_EXTRA]->set_notnull();

      table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);

      schema_table_store_record(thd, table);

    }

  }


  NdbDictionary::Dictionary::List tslist;

  dict->listObjects(tslist, NdbDictionary::Object::Tablespace);

  ndberr= dict->getNdbError();

  if (ndberr.classification != NdbError::NoError)

    ERR_RETURN(ndberr);


  for (i= 0; i < tslist.count; i++)

  {

    NdbDictionary::Dictionary::List::Element&elt= tslist.elements[i];


    NdbDictionary::Tablespace ts= dict->getTablespace(elt.name);

    ndberr= dict->getNdbError();

    if (ndberr.classification != NdbError::NoError)

    {

      if (ndberr.classification == NdbError::SchemaError)

        continue;

      ERR_RETURN(ndberr);

    }


    init_fill_schema_files_row(table);

    table->field[IS_FILES_FILE_TYPE]->set_notnull();

    table->field[IS_FILES_FILE_TYPE]->store("TABLESPACE", 10,

                                            system_charset_info);


    table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();

    table->field[IS_FILES_TABLESPACE_NAME]->store(elt.name,

                                                     strlen(elt.name),

                                                     system_charset_info);

    table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();

    table->field[IS_FILES_LOGFILE_GROUP_NAME]->

      store(ts.getDefaultLogfileGroup(),

           strlen(ts.getDefaultLogfileGroup()),

           system_charset_info);


    table->field[IS_FILES_ENGINE]->set_notnull();

    table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,

                                         ndbcluster_hton_name_length,

                                         system_charset_info);


    table->field[IS_FILES_EXTENT_SIZE]->set_notnull();

    table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);


    table->field[IS_FILES_VERSION]->set_notnull();

    table->field[IS_FILES_VERSION]->store(ts.getObjectVersion(), true);


    schema_table_store_record(thd, table);

  }


  NdbDictionary::Dictionary::List uflist;

  dict->listObjects(uflist, NdbDictionary::Object::Undofile);

  ndberr= dict->getNdbError();

  if (ndberr.classification != NdbError::NoError)

    ERR_RETURN(ndberr);


  for (i= 0; i < uflist.count; i++)

  {

    NdbDictionary::Dictionary::List::Element& elt= uflist.elements[i];

    Ndb_cluster_connection_node_iter iter;

    unsigned id;


    g_ndb_cluster_connection->init_get_next_node(iter);


    while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))

    {

      NdbDictionary::Undofile uf= dict->getUndofile(id, elt.name);

      ndberr= dict->getNdbError();

      if (ndberr.classification != NdbError::NoError)

      {

        if (ndberr.classification == NdbError::SchemaError)

          continue;

        if (ndberr.classification == NdbError::UnknownResultError)

          continue;

        ERR_RETURN(ndberr);

      }

      NdbDictionary::LogfileGroup lfg=

        dict->getLogfileGroup(uf.getLogfileGroup());

      ndberr= dict->getNdbError();

      if (ndberr.classification != NdbError::NoError)

      {

        if (ndberr.classification == NdbError::SchemaError)

          continue;

        ERR_RETURN(ndberr);

      }


      init_fill_schema_files_row(table);

      table->field[IS_FILES_FILE_NAME]->set_notnull();

      table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name),

                                              system_charset_info);

      table->field[IS_FILES_FILE_TYPE]->set_notnull();

      table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,

                                              system_charset_info);

      NdbDictionary::ObjectId objid;

      uf.getLogfileGroupId(&objid);

      table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();

      table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(uf.getLogfileGroup(),

                                                  strlen(uf.getLogfileGroup()),

                                                       system_charset_info);

      table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();

      table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(objid.getObjectId(), true);

      table->field[IS_FILES_ENGINE]->set_notnull();

      table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,

                                           ndbcluster_hton_name_length,

                                           system_charset_info);


      table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();

      table->field[IS_FILES_TOTAL_EXTENTS]->store(uf.getSize()/4, true);

      table->field[IS_FILES_EXTENT_SIZE]->set_notnull();

      table->field[IS_FILES_EXTENT_SIZE]->store(4, true);


      table->field[IS_FILES_INITIAL_SIZE]->set_notnull();

      table->field[IS_FILES_INITIAL_SIZE]->store(uf.getSize(), true);

      table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();

      table->field[IS_FILES_MAXIMUM_SIZE]->store(uf.getSize(), true);


      table->field[IS_FILES_VERSION]->set_notnull();

      table->field[IS_FILES_VERSION]->store(uf.getObjectVersion(), true);


      char extra[100];

      int len= my_snprintf(extra,sizeof(extra),"CLUSTER_NODE=%u;UNDO_BUFFER_SIZE=%lu",

                           id, (ulong) lfg.getUndoBufferSize());

      table->field[IS_FILES_EXTRA]->set_notnull();

      table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);

      schema_table_store_record(thd, table);

    }

  }


  // now for LFGs

  NdbDictionary::Dictionary::List lfglist;

  dict->listObjects(lfglist, NdbDictionary::Object::LogfileGroup);

  ndberr= dict->getNdbError();

  if (ndberr.classification != NdbError::NoError)

    ERR_RETURN(ndberr);


  for (i= 0; i < lfglist.count; i++)

  {

    NdbDictionary::Dictionary::List::Element& elt= lfglist.elements[i];


    NdbDictionary::LogfileGroup lfg= dict->getLogfileGroup(elt.name);

    ndberr= dict->getNdbError();

    if (ndberr.classification != NdbError::NoError)

    {

      if (ndberr.classification == NdbError::SchemaError)

        continue;

      ERR_RETURN(ndberr);

    }


    init_fill_schema_files_row(table);

    table->field[IS_FILES_FILE_TYPE]->set_notnull();

    table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,

                                            system_charset_info);


    table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();

    table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(elt.name,

                                                     strlen(elt.name),

                                                     system_charset_info);

    table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();

    table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(lfg.getObjectId(), true);

    table->field[IS_FILES_ENGINE]->set_notnull();

    table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,

                                         ndbcluster_hton_name_length,

                                         system_charset_info);


    table->field[IS_FILES_FREE_EXTENTS]->set_notnull();

    table->field[IS_FILES_FREE_EXTENTS]->store(lfg.getUndoFreeWords(), true);

    table->field[IS_FILES_EXTENT_SIZE]->set_notnull();

    table->field[IS_FILES_EXTENT_SIZE]->store(4, true);


    table->field[IS_FILES_VERSION]->set_notnull();

    table->field[IS_FILES_VERSION]->store(lfg.getObjectVersion(), true);


    char extra[100];

    int len= my_snprintf(extra,sizeof(extra),

                         "UNDO_BUFFER_SIZE=%lu",

                         (ulong) lfg.getUndoBufferSize());

    table->field[IS_FILES_EXTRA]->set_notnull();

    table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);

    schema_table_store_record(thd, table);

  }

  DBUG_RETURN(0);

}


static int show_ndb_vars(THD *thd, SHOW_VAR *var, char *buff)

{

  if (!check_ndb_in_thd(thd))

    return -1;

  struct st_ndb_status *st;

  SHOW_VAR *st_var;

  {

    char *mem= (char*)sql_alloc(sizeof(struct st_ndb_status) +

                                sizeof(ndb_status_variables_dynamic));

    st= new (mem) st_ndb_status;

    st_var= (SHOW_VAR*)(mem + sizeof(struct st_ndb_status));

    memcpy(st_var, &ndb_status_variables_dynamic, sizeof(ndb_status_variables_dynamic));

    int i= 0;

    SHOW_VAR *tmp= &(ndb_status_variables_dynamic[0]);

    for (; tmp->value; tmp++, i++)

      st_var[i].value= mem + (tmp->value - (char*)&g_ndb_status);

  }

  {

    Thd_ndb *thd_ndb= get_thd_ndb(thd);

    Ndb_cluster_connection *c= thd_ndb->connection;

    update_status_variables(thd_ndb, st, c);

  }

  var->type= SHOW_ARRAY;

  var->value= (char *) st_var;

  return 0;

}


SHOW_VAR ndb_status_variables_export[]= {

  {"Ndb",          (char*) &show_ndb_vars,                 SHOW_FUNC},

  {"Ndb_conflict", (char*) &ndb_status_conflict_variables, SHOW_ARRAY},

  {"Ndb",          (char*) &ndb_status_injector_variables, SHOW_ARRAY},

  {"Ndb",          (char*) &ndb_status_slave_variables,    SHOW_ARRAY},

  {"Ndb",          (char*) &show_ndb_server_api_stats,     SHOW_FUNC},

  {"Ndb_index_stat", (char*) &ndb_status_index_stat_variables, SHOW_ARRAY},

  {NullS, NullS, SHOW_LONG}

};


static MYSQL_SYSVAR_ULONG(

  cache_check_time,                  /* name */

  opt_ndb_cache_check_time,              /* var */

  PLUGIN_VAR_RQCMDARG,

  "A dedicated thread is created to, at the given "

  "millisecond interval, invalidate the query cache "

  "if another MySQL server in the cluster has changed "

  "the data in the database.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  0,                                 /* default */

  0,                                 /* min */

  ONE_YEAR_IN_SECONDS,               /* max */

  0                                  /* block */

);


static MYSQL_SYSVAR_ULONG(

  extra_logging,                     /* name */

  opt_ndb_extra_logging,                 /* var */

  PLUGIN_VAR_OPCMDARG,

  "Turn on more logging in the error log.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  1,                                 /* default */

  0,                                 /* min */

  0,                                 /* max */

  0                                  /* block */

);


static MYSQL_SYSVAR_ULONG(

  wait_connected,                    /* name */

  opt_ndb_wait_connected,            /* var */

  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,

  "Time (in seconds) for mysqld to wait for connection "

  "to cluster management and data nodes.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  0,                                 /* default */

  0,                                 /* min */

  ONE_YEAR_IN_SECONDS,               /* max */

  0                                  /* block */

);


static MYSQL_SYSVAR_ULONG(

  wait_setup,                        /* name */

  opt_ndb_wait_setup,                /* var */

  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,

  "Time (in seconds) for mysqld to wait for setup to "

  "complete (0 = no wait)",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  15,                                /* default */

  0,                                 /* min */

  ONE_YEAR_IN_SECONDS,               /* max */

  0                                  /* block */

);


static MYSQL_SYSVAR_UINT(

  cluster_connection_pool,           /* name */

  opt_ndb_cluster_connection_pool,   /* var */

  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,

  "Pool of cluster connections to be used by mysql server.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  1,                                 /* default */

  1,                                 /* min */

  63,                                /* max */

  0                                  /* block */

);


/* should be in index_stat.h */


extern int

ndb_index_stat_option_check(MYSQL_THD,

                            struct st_mysql_sys_var *var,

                            void *save,

                            struct st_mysql_value *value);

extern void

ndb_index_stat_option_update(MYSQL_THD,

                             struct st_mysql_sys_var *var,

                             void *var_ptr,

                             const void *save);


extern char ndb_index_stat_option_buf[];


static MYSQL_SYSVAR_STR(

  index_stat_option,                /* name */

  opt_ndb_index_stat_option,        /* var */

  PLUGIN_VAR_RQCMDARG,

  "Comma-separated tunable options for ndb index statistics",

  ndb_index_stat_option_check,      /* check func. */

  ndb_index_stat_option_update,     /* update func. */

  ndb_index_stat_option_buf

);


ulong opt_ndb_report_thresh_binlog_epoch_slip;

static MYSQL_SYSVAR_ULONG(

  report_thresh_binlog_epoch_slip,   /* name */

  opt_ndb_report_thresh_binlog_epoch_slip,/* var */

  PLUGIN_VAR_RQCMDARG,

  "Threshold on number of epochs to be behind before reporting binlog "

  "status. E.g. 3 means that if the difference between what epoch has "

  "been received from the storage nodes and what has been applied to "

  "the binlog is 3 or more, a status message will be sent to the cluster "

  "log.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  3,                                 /* default */

  0,                                 /* min */

  256,                               /* max */

  0                                  /* block */

);


ulong opt_ndb_report_thresh_binlog_mem_usage;

static MYSQL_SYSVAR_ULONG(

  report_thresh_binlog_mem_usage,    /* name */

  opt_ndb_report_thresh_binlog_mem_usage,/* var */

  PLUGIN_VAR_RQCMDARG,

  "Threshold on percentage of free memory before reporting binlog "

  "status. E.g. 10 means that if amount of available memory for "

  "receiving binlog data from the storage nodes goes below 10%, "

  "a status message will be sent to the cluster log.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  10,                                /* default */

  0,                                 /* min */

  100,                               /* max */

  0                                  /* block */

);


my_bool opt_ndb_log_update_as_write;

static MYSQL_SYSVAR_BOOL(

  log_update_as_write,               /* name */

  opt_ndb_log_update_as_write,       /* var */

  PLUGIN_VAR_OPCMDARG,

  "For efficiency log only after image as a write event. "

  "Ignore before image. This may cause compatability problems if "

  "replicating to other storage engines than ndbcluster.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  1                                  /* default */

);


my_bool opt_ndb_log_updated_only;

static MYSQL_SYSVAR_BOOL(

  log_updated_only,                  /* name */

  opt_ndb_log_updated_only,          /* var */

  PLUGIN_VAR_OPCMDARG,

  "For efficiency log only updated columns. Columns are considered "

  "as \"updated\" even if they are updated with the same value. "

  "This may cause compatability problems if "

  "replicating to other storage engines than ndbcluster.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  1                                  /* default */

);


my_bool opt_ndb_log_orig;

static MYSQL_SYSVAR_BOOL(

  log_orig,                          /* name */

  opt_ndb_log_orig,                  /* var */

  PLUGIN_VAR_OPCMDARG,

  "Log originating server id and epoch in ndb_binlog_index. Each epoch "

  "may in this case have multiple rows in ndb_binlog_index, one for "

  "each originating epoch.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  0                                  /* default */

);


my_bool opt_ndb_log_bin;

static MYSQL_SYSVAR_BOOL(

  log_bin,                           /* name */

  opt_ndb_log_bin,                   /* var */

  PLUGIN_VAR_OPCMDARG,

  "Log ndb tables in the binary log. Option only has meaning if "

  "the binary log has been turned on for the server.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  1                                  /* default */

);


my_bool opt_ndb_log_binlog_index;

static MYSQL_SYSVAR_BOOL(

  log_binlog_index,                  /* name */

  opt_ndb_log_binlog_index,          /* var */

  PLUGIN_VAR_OPCMDARG,

  "Insert mapping between epochs and binlog positions into the "

  "ndb_binlog_index table.",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  1                                  /* default */

);


static my_bool opt_ndb_log_empty_epochs;

static MYSQL_SYSVAR_BOOL(

  log_empty_epochs,                  /* name */

  opt_ndb_log_empty_epochs,          /* var */

  PLUGIN_VAR_OPCMDARG,

  "",

  NULL,                              /* check func. */

  NULL,                              /* update func. */

  0                                  /* default */

);


bool ndb_log_empty_epochs(void)

{

  return opt_ndb_log_empty_epochs;

}


my_bool opt_ndb_log_apply_status;

static MYSQL_SYSVAR_BOOL(

  log_apply_status,                 /* name */

  opt_ndb_log_apply_status,         /* var */

  PLUGIN_VAR_OPCMDARG,

  "Log ndb_apply_status updates from Master in the Binlog",

  NULL,                             /* check func. */

  NULL,                             /* update func. */

  0                                 /* default */

);


static MYSQL_SYSVAR_STR(

  connectstring,                    /* name */

  opt_ndb_connectstring,            /* var */

  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,

  "Connect string for ndbcluster.",

  NULL,                             /* check func. */

  NULL,                             /* update func. */

  NULL                              /* default */

);


static MYSQL_SYSVAR_STR(

  mgmd_host,                        /* name */

  opt_ndb_connectstring,                /* var */

  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,

  "Same as --ndb-connectstring",

  NULL,                             /* check func. */

  NULL,                             /* update func. */

  NULL                              /* default */

);


static MYSQL_SYSVAR_UINT(

  nodeid,                           /* name */

  opt_ndb_nodeid,                   /* var */

  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,

  "Set nodeid for this node. Overrides node id specified "

  "in --ndb-connectstring.",

  NULL,                             /* check func. */

  NULL,                             /* update func. */

  0,                                /* default */

  0,                                /* min */

  MAX_NODES_ID,                     /* max */

  0                                 /* block */

);


#ifndef DBUG_OFF


static

void

dbug_check_shares(THD*, st_mysql_sys_var*, void*, const void*)

{

  sql_print_information("dbug_check_shares");

  for (uint i= 0; i < ndbcluster_open_tables.records; i++)

  {

    NDB_SHARE * share = (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);

    sql_print_information("  %s.%s: state: %s(%u) use_count: %u",

                          share->db, share->table_name,

                          get_share_state_string(share->state),

                          (unsigned)share->state,

                          share->use_count);

  }


  for (uint i= 0; i < ndbcluster_open_tables.records; i++)

  {

    NDB_SHARE * share = (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);

    DBUG_ASSERT(strcmp(share->db, "mysql") == 0);

  }

}


static MYSQL_THDVAR_UINT(

  check_shares,              /* name */

  PLUGIN_VAR_RQCMDARG,

  "Debug, only...check that no shares are lingering...",

  NULL,                              /* check func */

  dbug_check_shares,                 /* update func */

  0,                                 /* default */

  0,                                 /* min */

  1,                                 /* max */

  0                                  /* block */

);


#endif


static struct st_mysql_sys_var* system_variables[]= {

  MYSQL_SYSVAR(cache_check_time),

  MYSQL_SYSVAR(extra_logging),

  MYSQL_SYSVAR(wait_connected),

  MYSQL_SYSVAR(wait_setup),

  MYSQL_SYSVAR(cluster_connection_pool),

  MYSQL_SYSVAR(report_thresh_binlog_mem_usage),

  MYSQL_SYSVAR(report_thresh_binlog_epoch_slip),

  MYSQL_SYSVAR(log_update_as_write),

  MYSQL_SYSVAR(log_updated_only),

  MYSQL_SYSVAR(log_orig),

  MYSQL_SYSVAR(distribution),

  MYSQL_SYSVAR(autoincrement_prefetch_sz),

  MYSQL_SYSVAR(force_send),

  MYSQL_SYSVAR(use_exact_count),

  MYSQL_SYSVAR(use_transactions),

  MYSQL_SYSVAR(use_copying_alter_table),

  MYSQL_SYSVAR(optimized_node_selection),

  MYSQL_SYSVAR(batch_size),

  MYSQL_SYSVAR(optimization_delay),

  MYSQL_SYSVAR(index_stat_enable),

  MYSQL_SYSVAR(index_stat_option),

  MYSQL_SYSVAR(index_stat_cache_entries),

  MYSQL_SYSVAR(index_stat_update_freq),

  MYSQL_SYSVAR(table_no_logging),

  MYSQL_SYSVAR(table_temporary),

  MYSQL_SYSVAR(log_bin),

  MYSQL_SYSVAR(log_binlog_index),

  MYSQL_SYSVAR(log_empty_epochs),

  MYSQL_SYSVAR(log_apply_status),

  MYSQL_SYSVAR(connectstring),

  MYSQL_SYSVAR(mgmd_host),

  MYSQL_SYSVAR(nodeid),

  MYSQL_SYSVAR(blob_read_batch_bytes),

  MYSQL_SYSVAR(blob_write_batch_bytes),

  MYSQL_SYSVAR(deferred_constraints),

  MYSQL_SYSVAR(join_pushdown),

#ifndef DBUG_OFF

  MYSQL_SYSVAR(check_shares),

#endif

  NULL

};


struct st_mysql_storage_engine ndbcluster_storage_engine=

{ MYSQL_HANDLERTON_INTERFACE_VERSION };


#include "ha_ndbinfo.h"


extern struct st_mysql_sys_var* ndbinfo_system_variables[];


struct st_mysql_storage_engine ndbinfo_storage_engine=

{ MYSQL_HANDLERTON_INTERFACE_VERSION };


mysql_declare_plugin(ndbcluster)

{

  MYSQL_STORAGE_ENGINE_PLUGIN,

  &ndbcluster_storage_engine,

  ndbcluster_hton_name,

  "MySQL AB",

  "Clustered, fault-tolerant tables",

  PLUGIN_LICENSE_GPL,

  ndbcluster_init,            /* plugin init */

  NULL,                       /* plugin deinit */

  0x0100,                     /* plugin version */

  ndb_status_variables_export,/* status variables                */

  system_variables,           /* system variables */

  NULL,                       /* config options                  */

  0                           /* flags */

},

{

  MYSQL_STORAGE_ENGINE_PLUGIN,

  &ndbinfo_storage_engine,

  "ndbinfo",

  "Sun Microsystems Inc.",

  "MySQL Cluster system information storage engine",

  PLUGIN_LICENSE_GPL,

  ndbinfo_init,               /* plugin init */

  ndbinfo_deinit,             /* plugin deinit */

  0x0001,                     /* plugin version */

  NULL,                       /* status variables */

  ndbinfo_system_variables,   /* system variables */

  NULL,                        /* config options */

  0                           /* flags */

}

mysql_declare_plugin_end;


#endif