/* $Id: gridjm.cpp,v 1.5 2007-11-08 15:43:40 aehyvari Exp $ */
/* Copyright 2003 Henrik Thostrup Jensen and Jesper Ryge Leth
 * All rights reserved.
 *
 * This file is part of NGProxy.
 *
 * NGProxy is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * NGProxy is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with NGProxy; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
/* start[copyright.txt] */
/* Including data from file copyright.txt */
/* file opened */
/* Reading 775 bytes of data */
/* Start of include */
/*
 * Copyright 2007 Antti Hyvrinen
 *
 * This file is part of GridJM.
 *
 * GridJM is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * GridJM is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with GridJM; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

/* End of include */
/* stop[copyright.txt] */

#include <list>
#include <queue>
#include <iostream>
#include <arc/mdsdiscovery.h>
#include <arc/mdsquery.h>
#include <arc/standardbrokers.h>
#include "jobsubmission_badqueues.h"
#include <arc/jobftpcontrol.h>
#include <arc/joblist.h>
#include <arc/url.h>
#include <arc/target.h>
#include <arc/notify.h>
#include <arc/certificate.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <signal.h>
#include "gridjm.h"
#include "parseargs.h"

char *failureconv[]  =
    { "Non-zero exit of job",
      "State stall in state INLRMS:Q",
      "State stall in state INLRMS:S",
      "State stall in state INLRMS:E",
      "State stall in state INLRMS:O",
      "State stall in state EXECUTED",
      "Failure in download",
      "Job disappeared or did not show after submission",
      "Some other failure" };


GridJM::GridJM(Instrumenter* i, struct arguments * a, int p_write,
                queue<char*> *n_jobs, pthread_mutex_t *n_jobs_lock,
                list<PidJob*> *f_downloads, pthread_mutex_t *f_dl_lock) {
    args = a;                   /* Arguments */
    num_current_jobs = 0;       /* Jobs currently in grid */
    clupd = 0;                  /* Cluster list last updated */
    qupd = 0;                   /* Queue list last updated */
    inst = i;                   /* Instrumentation module */
    pipe_write = p_write;       /* Ask for more jobs here */
    new_jobs = n_jobs;          /* New job queue */
    new_jobs_lock = n_jobs_lock; /* Lock for new job queue */
    forked_downloads = f_downloads; /* Forked downloads (pid & name) */
    forked_dl_lock = f_dl_lock; /* Lock for forked downloads */
    available_cpus = 1;
}

/**
 * We do some strange initialization here w.r.t. the globus modules.
 * Especially the definition of GlobusGSIGSSAPIModule is required so
 * that the user name is properly announced to the resources in
 * Ldap-queries.  Without it, there will be an extra /CN=xxxx in the
 * end.
 */

int GridJM::start() {
    globus_module_activate(GLOBUS_XIO_MODULE);
    GlobusGSIGSSAPIModule gmod;
    SetNotifyLevel(INFO);
    Certificate proxy(PROXY);
    notify(INFO) << "proxy subject: " << proxy.GetSN() << endl;
    notify(INFO) << "proxy valid to: " << proxy.ExpiryTime() << endl;


    return mainloop();
}

int GridJM::mainloop() {

    int last_time = time(0);
    int time_left = 0;

    running = 1;

    do {
        /* Clean the forked downloads & report the finishing
         * Check if it is time to update the job statuses */
        if (time(0) - last_time > args->interval) {
            UpdateJobStatus();
            last_time = time(0);
        }

        /* Update the available clusters if list is old */
        if ((clusters.size() == 0) || (time(0) - clupd > args->clupdp)) {
            try {
                clusters = GetClusterResources();
                clupd = time(0);
            } catch (MDSDiscoveryError e) {
                notify(WARNING) << "Error: " << e.what() << endl;
            }
        }

        /* Update the available queues if list is old */
        if ((queuelist.size() == 0) || (time(0) - qupd > args->qupdp)) {
            try {
                queuelist = GetQueueInfo(clusters,
                        MDS_FILTER_CLUSTERINFO, true, "", 20);
                qupd = time(0);
            } catch (MDSDiscoveryError e) {
                notify(WARNING) << "Error: " << e.what() << endl;
            }
        }

        /* Check the new job queue */
        HandleNewJobs();

        time_left = sleep(1);
        if (time_left) {
            // If we get here, we where interrupted during sleep
            notify(INFO) << "The sleeper awakens" << endl;
        }
    } while (running);

    /* FIXME some sensible error reporting, please */
    return -1;
}

void GridJM::HandleNewJobs() {
    /**
     * We now check for the arrival of new jobs
     * This is done by acquiring lock, see if there is anything
     * in the new_jobs vector, we take one out, release the lock.
     * and then we submit the job.
     * If there are no new jobs and we get here, it means the queue
     * for sending jobs is empty, so we might want to ask more jobs.
     * Call the procedure...
     */
    bool new_job = false;    // Indicates whethere there are any new jobs
    Xrsl *xrsl;
    pthread_mutex_lock(new_jobs_lock);

    if (!new_jobs->empty()) {  // See if any new jobs arrived
        notify(INFO) << "New job(s) appeared in queue" << endl;
        // Take the first element in the queue and pop it
        try {
            xrsl = new Xrsl(new_jobs->front());
            new_job = true;
        }
        catch (XrslError e) {
            notify(INFO) << "Xrsl: " << e.what() << endl;
            new_job = false;
        }

        free(new_jobs->front());
        new_jobs->pop();
    }
    pthread_mutex_unlock(new_jobs_lock); // Release lock

    if (new_job) {
        try {
            inst->simstart(xrsl->GetRelation("jobname").GetSingleValue());
            inst->tofile();
        }
        catch (XrslError e) {
            notify(ERROR) << "Xrsl: " << e.what() << endl;
            free(xrsl);
            return;
        }

        URL *jobid = SubmitJob(xrsl);
        if (!jobid) {
            // Submission failed - do nothing
            // Hey? Are we leaking here sometimes?
            notify(ERROR) << "Job submission failed" << endl;
            free(xrsl);
            return;
        }
        else {
            notify(DEBUG) << jobid->str() << endl;
            // Job was submitted succesfully
            // Create jobstatus object and push onto current_jobs
            JobStatus* js = new JobStatus(xrsl, jobid);
            current_jobs[js->getName()] = js;
            // We need a counter telling how many jobs are
            // in current jobs
            num_current_jobs += 1;
            // Tell instrumentation
            notify(INFO) << js->getName() << " is " << jobid->str() << endl;
        }
    } else {
        GetMoreJobs();
    }

    return;
}

void GridJM::GetMoreJobs() {
    if (pipe_write != -1) {
        notify(INFO) << "Number of jobs in grid: " << num_current_jobs << endl;
        // Check that we have not sent too many and there are still cpus
        // available.  available_cpus is constant 1 if automatic is not
        // on.
        if (num_current_jobs < args->maxjobs && available_cpus > 0) {
//            notify(INFO) << "Asking for more jobs from the socket" << endl;
            notify(INFO) << "."; // More jobs
            fflush(NULL);
            char *output;
            int strsz = asprintf(&output,
                "# free %d\n", args->maxjobs - num_current_jobs);
            write(pipe_write, output, strsz);
        } else {
            notify(INFO) << "Grid is full" << endl;
        }
    }
    return;
}

URL *GridJM::SubmitJob(Xrsl *xrsl) {

    // Check that the xrsl is ok
    try {
        PerformXrslValidation(*xrsl);
    } catch (ARCLibError e) {
        notify(INFO) << e.what() << endl;
        return NULL;
    }

    if (clusters.size() == 0)
        return NULL;

    if (queuelist.size() == 0)
        return NULL;

    /* t_queuelist is made from queuelist by removing current bad
     * clusters from it */
    list<Queue> t_queuelist = queuelist;

    // Remove bad clusters from queues
    cout << "Bad clusters: ";
    for (vector<BCEntry*>::iterator bci = badclusters.begin();
            bci != badclusters.end(); bci++) {
        // Multiple queues in one cluster, remove all.
        // Queues must be removed one at a time
        bool found = true; // At least one round
        while ((*bci)->valid() && found == true) {
            cout << (*bci)->cluster << "(" << (*bci)->valid() << ") ";

            found = false;
            for (list<Queue>::iterator qi = t_queuelist.begin();
                    qi != t_queuelist.end(); qi++) {
                if (strcmp((*qi).cluster.hostname.c_str(),
                                (*bci)->cluster.c_str()) == 0) {
                    t_queuelist.erase(qi);
                    found = true;
                    break;
                }
            }
        }
    }
    cout << "." << endl;

    // Remove the bad queues noticed by the previous submissions
    cout << "Bad queues: ";
    for (vector<BQEntry*>::iterator bqi = badqueues.begin();
            bqi != badqueues.end(); bqi++) {
        bool found = true;
        while ((*bqi)->valid() && found == true) {
            cout << (*bqi)->queue << "@" << (*bqi)->cluster << " ";

            found = false;
            for (list<Queue>::iterator qi = t_queuelist.begin();
                    qi != t_queuelist.end(); qi++) {
                if ((strcmp((*qi).cluster.hostname.c_str(),
                                (*bqi)->cluster.c_str()) == 0) &&
                    (strcmp((*qi).name.c_str(),
                            (*bqi)->queue.c_str()) == 0) ) {
                    t_queuelist.erase(qi);
                    found = true;
                    break;
                }
            }
        }
    }
    cout << "." << endl;



    std::list<Target> targetlist;
    try {
        targetlist = ConstructTargets(t_queuelist, *xrsl);
    } catch (TargetError e) {
        notify(ERROR) << e.what() << endl;
        return NULL;
    }


    PerformStandardBrokering(targetlist);


    JobSubmission_bq submit(*xrsl, targetlist, false);


    if (args->automatic) {

        available_cpus = 0;

        for (list<Target>::iterator ti = targetlist.begin();
                ti != targetlist.end(); ti++) {
           notify(INFO) << "Cl " \
                        << (*ti).cluster.hostname \
                        << " (" \
                        << (*ti).cluster.used_cpus \
                        << "/" \
                        << (*ti).cluster.total_cpus \
                        << ")" << endl;
            available_cpus += (*ti).cluster.total_cpus - (*ti).cluster.used_cpus;
        }

        notify(INFO) << "Total available CPUs: " << available_cpus << endl;
    }
    else {
        available_cpus = 1;
    }

    URL *jobID;
    string JobName = "unknown";

    if (xrsl->IsRelation("jobname")) {
        JobName = xrsl->GetRelation("jobname").GetSingleValue();
    }

    // Attempt to submit.  After (successfull) submission, collect the
    // possible bad queues to avoid spending excessive time in trying to
    // submit to them again.
    try {
        jobID = new URL(submit.Submit(20));
        list<QueueName>::iterator qi;
        for (qi = submit.badQueues.begin();
                qi != submit.badQueues.end();
                qi++) {
            BQEntry * badq = new BQEntry(qi->clustername,
                                         qi->queuename,
                                         BAD_TIMEOUT);
            badqueues.push_back(badq);
        }
        submit.RegisterJobsubmission(queuelist);

    } catch (JobSubmissionError e) {
        notify(WARNING) << "Error: " << e.what() << endl;
        return NULL;
    }
    catch (XrslError e) {
        notify(WARNING) << "Error: " << e.what() << endl;
        return NULL;
    }

    AddJobID(jobID->str(), JobName);

    notify(INFO) << "Submitted job " << *jobID << endl;

    return jobID;
}

void GridJM::UpdateJobStatus() {

    /**
     * Iterate over the jobs, get the status for each one,
     * and act if the state is "FINISHED" or "FAILED"
     */

    /* Get current jobids as a list of strings */
    list<string> jobstrs;
    for (map<string,JobStatus*>::iterator jsi = current_jobs.begin();
            jsi != current_jobs.end(); jsi++)
        jobstrs.push_back((*jsi).second->getJobid()->str());

    bool incremented = false;

    /* Perform query for the list */
    list<Job> joblist = GetJobInfo(jobstrs);

    for (list<Job>::iterator jli = joblist.begin();
        jli != joblist.end(); jli++) {

        string status = "";
        string error = "";

        if (jli->status == "") {
            notify(INFO) << "Job " << jli->id << " is sent but can't be seen (yet)" << endl;
            status = "NOTFOUND";
            error = "";
        }
        else {
            status = jli->status;
            error = jli->errors;
        }

        // Find the job from current_jobs
        JobStatus * current_js = NULL;
        for (map<string,JobStatus*>::iterator jsi = current_jobs.begin();
                jsi != current_jobs.end(); jsi++) {
            if (strcmp((*jsi).second->getJobid()->str().c_str(),
                        jli->id.c_str()) == 0) {
                current_js = (*jsi).second;
                break;
            }
        }

        assert(current_js != NULL); // Supervising unsubmitted jobs

        string prev_status  = current_js->getPrevStatus();
        time_t prev_time    = current_js->getPrevTime();
        time_t current_time = time(NULL);
        current_js->setNewStatus(status);


        notify(INFO) << "Status change " << jli->id << endl;
        notify(INFO) << "  " << prev_status << " => " << status;
        notify(INFO) << " (" << current_time - prev_time << ")" << endl;

        if (strncmp(status.c_str(),"FINISHED", 8) == 0) {

            // Check if error occured during job execution
            if (error.empty()) {
                // Normal job exit
                notify(INFO) << "Job " << jli->id <<
                    " finished correctly and was removed from current job list"
                    << endl;
                // Fetch the job (safe, as not found
                // jobs are not in state FINISHED)
                FetchJob(jli->id, current_js->getName(), args->dir, true);
                // Tell instrumentation
//                inst->simstop(current_js->getName(), successfull);
//                inst->tofile();
                current_jobs.erase(current_js->getName());
                -- num_current_jobs;
            }
            // Error occured during job execution
            else {
                notify(INFO) << cout <<
                    "Job with id " << jli->id << " failed" << endl;
                // handle job failure here
                bool job_resubmitted =
                    HandleJobFailure(current_js->getName(), nzexit);
            }
        }

        else if ((strncmp(status.c_str(), "INLRMS:R", 8) == 0)) {
            // Job is running
        }

        // Check that the job has been doing some progress in
        // grid.
        // The longest status string is MAXSTATUSLENGTH chars
        else if (strncmp(status.c_str(), prev_status.c_str(), MAXSTATUSLENGTH)
                == 0) {
            notify(INFO) << "Job with id " <<
                jli->id << " has not changed" << endl;

            if (current_time - prev_time > args->s_timeout) {
                // Job has been on the same state for
                // too long.
                failurestatus s;
                if (strncmp(status.c_str(), "INLRMS:Q", MAXSTATUSLENGTH) == 0)
                    s = queuestall;
                else if (strncmp(status.c_str(), "INLRMS:S", MAXSTATUSLENGTH) == 0)
                    s = suspendstall;
                else if (strncmp(status.c_str(), "INLRMS:E", MAXSTATUSLENGTH) == 0)
                    s = finishingstall;
                else if (strncmp(status.c_str(), "INLRMS:O", MAXSTATUSLENGTH) == 0)
                    s = lrmsotherstall;
                else if (strncmp(status.c_str(),
                            "EXECUTED", MAXSTATUSLENGTH) == 0)
                    s = execstall;
                else if (strncmp(status.c_str(),
                            "NOTFOUND", MAXSTATUSLENGTH) == 0)
                    s = disappeared;
                else
                    s = other;

                HandleJobFailure(current_js->getName(), s);
            }
        }

        else if (status.compare("FAILED") == 0) {
            cout << "Job with id " << jli->id << "failed" << endl;
            // handle job failure here
            HandleJobFailure(current_js->getName(), nzexit);
        }
    }
    return;
}

void GridJM::FetchJob(string jobid, string jobname, string dldir, bool ok) {
    pid_t pid = fork();
    if (pid == 0) {
        char* ngget = args->ngget;
//        cout << "ngget " << jobname.c_str() << endl;
        int rval = execlp(ngget, ngget, "-dir", dldir.c_str(),
                jobid.c_str(), (char *)NULL);
        perror("execlp()"); // Only return on error
        exit(1);
    }
    // In parent
    // Push the <pid, jobname> -pair to forked_downloads
    PidJob *pj = new PidJob(pid, jobid, jobname, ok);
    pthread_mutex_lock(forked_dl_lock);
    forked_downloads->push_back(pj);
    pthread_mutex_unlock(forked_dl_lock);
    return;
}

bool GridJM::HandleJobFailure(string jobname, failurestatus reason) {

    int fd;

    JobStatus *js = current_jobs[jobname];
    string jobid = js->getJobid()->str();

    cout << "Job " << jobid << " failed - attempting to recover" << endl;

    // Cancel job
    try {
        if (args->geterrors && (reason == nzexit)) {
            // Fetching failed jobs is not useful and causes problems
            // on the clients
            FetchJob(jobid, jobname, args->errordir, false);
            if ((fd = open(args->errclfile,
                            O_WRONLY | O_CREAT | O_APPEND,
                            S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) != -1) {
                char *tmpbuf = (char *)malloc(jobid.size() + 1);
                strncpy(tmpbuf, jobid.c_str(), jobid.size());
                tmpbuf[jobid.size()] = '\n';
                if (write(fd, tmpbuf, jobid.size()+1) == -1)
                    perror("write()");
                close(fd);
                free(tmpbuf);
            }
            else
                perror("open()");
        }


        CancelJob(jobid);
        CleanJob(jobid);
        RemoveJobID(jobid);
    }
    catch (ARCLibError e) {
        notify(ERROR) << "Cancel or clean of job " << jobid << " failed:" << endl;
        notify(ERROR) << e.what();
    }

    // Report the reason of failure if the option is set
    if (args->geterrors) {
        if ((fd = open(args->errclfile,
                        O_WRONLY | O_CREAT | O_APPEND,
                        S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) != -1) {

            // Get current local time
            char *timebuf = getLTStr();

            char *t_ptr = failureconv[reason];

            int bufsize = jobid.size() + strlen(t_ptr) + strlen(timebuf) + 3;

            char *tmpbuf = (char *)malloc(bufsize);

            // Copy current time and a space to the front
            strncpy(tmpbuf, timebuf, strlen(timebuf));
            tmpbuf[strlen(timebuf)] = ' ';

            // Copy jobid and insert a space
            strncpy(tmpbuf + strlen(timebuf) + 1, jobid.c_str(), jobid.size());
            tmpbuf[jobid.size() + strlen(timebuf) + 1] = ' ';

            // Copy reason
            strncpy(tmpbuf+jobid.size()+strlen(timebuf)+2, t_ptr, strlen(t_ptr));
            tmpbuf[jobid.size() + strlen(timebuf) + strlen(t_ptr) + 2] = '\n';
            if (write(fd, tmpbuf, bufsize) == -1)
                perror("write()");
            close(fd);
            free(timebuf);
            free(tmpbuf);
        }
        else
            perror("open()");
    }

    // Add a bad cluster to the list of bad clusters
    BCEntry * badc = new BCEntry(js->getJobid()->Host(), BAD_TIMEOUT);
    badclusters.push_back(badc);

    if (js->getAttempts() >= args->retry) {
        // If the job has been submitted more than three times
        // and still failed we throw it out
        notify(DEBUG) << "Job " << jobid << " has been re-submitted "
                 << RESUBMIT_ATTEMPTS << " times - removing it from job list";
        notify(WARNING) << "Job " << jobid <<
            " was removed from current job list, due to maximum number of submissions attempts being reached" << endl;

        // Tell instrumentation
        // state stall + deleted
        inst->simstop(jobname, failed);
        inst->tofile();
        current_jobs.erase(jobname);

        -- num_current_jobs;
        return false;
    }
    // If we get here we attempt to resubmit the job

    Xrsl* xrsl = js->getXrsl();

    URL *new_jobid = SubmitJob(xrsl);
    if (new_jobid != NULL) {
        // Record the resubmission
        inst->evchange(jobname, resubmit);

        js->newSubmission(new_jobid);
        notify(ERROR) << jobid << " was resubmitted as " << new_jobid->str() << endl;

        js->setNewStatus("RESUB");
    }

    else {
        inst->simstop(jobname, submitfail);
        inst->tofile();
        current_jobs.erase(jobname);

        -- num_current_jobs;
        notify(ERROR) << jobid << " resubmission failed";
        return false;
    }

    return true; // jobs was sumbitted again
}

// ========================================================
// BCEntry class from here
// ========================================================
BCEntry::BCEntry(string cl, int to) {
    cluster = cl;
    timeout = to;
    insert_time = time(NULL);
}

bool BCEntry::valid() {
    return time(NULL) - insert_time <= timeout;
}

// ========================================================
// BQEntry class from here
// ========================================================
BQEntry::BQEntry(string cl, string qu, int to) {
    cluster = cl;
    queue = qu;
    timeout = to;
    insert_time = time(NULL);
}

bool BQEntry::valid() {
    return time(NULL) - insert_time <= timeout;
}

// ========================================================
// JobStatus class from here
// ========================================================


JobStatus::JobStatus(Xrsl *xrsl, URL *jobid) {

    this->attempts = 0;           // Init number of attempts
    this->xrsl = xrsl;            // Save xrsl
    this->current_jobid = jobid; // Save jobid
    this->prev_status = "INITIAL";
    this->prev_time = time(NULL);

    // Put the cluster name into the cluster vector
    string current_cluster = jobid->Host();
    this->clusters.push_back(current_cluster);

}



JobStatus::~JobStatus() {

    delete xrsl;
    delete current_jobid;
    return;
}


Xrsl* JobStatus::getXrsl() {
    return xrsl;
}


URL *JobStatus::getJobid() {

    return current_jobid;
}


int JobStatus::getAttempts() {

    return attempts;
}

string JobStatus::getPrevStatus() {

    return prev_status;
}

time_t JobStatus::getPrevTime() {

    return prev_time;
}

void JobStatus::setNewStatus(string s) {

    // We only want to update the time if the state has changed
    if (strncmp(prev_status.c_str(), s.c_str(), MAXSTATUSLENGTH) != 0) {
        prev_time = time(NULL);
        prev_status = s;
    }
    return;
}


void JobStatus::newSubmission(URL *new_jobid) {
    clusters.push_back(current_jobid->Host());
    delete current_jobid;
    current_jobid = new_jobid;
    attempts++;
    return;
}

string JobStatus::getName() {
    return xrsl->GetRelation("jobname").GetSingleValue();
}
