MHP语言开发札记 - Blog of Mathias
Blog of Mathias Web Securtiy&Deep Learning
MHP语言开发札记
发表于: | 分类: 技术文章 | 评论:0 | 阅读:799

MHP(Mystery Hypertext Preprocessor)语言,最初是我为了探究解释语言的运作机理,与实践一些理论知识来构思的。
这款语言的风格类似php,而且后期是支持扩展函数的。
于是我初步的设计是这样的
1.词法分析器对原始语言进行标注,并初步处理结构
2.语法分析器通过建立有限状态自动机(dfa)直接输出转化后的汇编指令
3.虚拟机(vm)通过模拟一套类x86的指令集,以及pc,sp等寄存器,来执行汇编指令
4.如果需要函数扩展,可以使用dlopen进行加载.so库

因为我这套语言的语法相对简单,因此直接输出了汇编
如果要实现多元运算,仍然需要进行递归下降分析,或者说把中缀式转化为后缀(逆波兰)式

大概的效果如图

1.png

粗糙的源码

#include "stdafx.h"
#include "vector"
#include "string"
#include "fstream"
#include "sstream"
#include "iostream"
#include "map"

bool isparam(std::string p) //是操作数还是变量
{
    char a = p[0];
    if (a >= '0'&& a <= '9') return false;
    else return true;
}
bool isstr(std::string p) //是操作数还是字符串
{
    char a = p[0];
    if (a == '"') return true;
    else return false;
}
std::string trim(std::string str)
{
    std::string result = "";
    for (int i = 0; i < str.length(); i++)
    {
        if (str[i] != ' ')
        {
            result = result + str[i];
        }
    }
    return  result;
}
class Tag {
public:
    static const int start = 0; //只在template函数中使用
    static const int end = 1;
    static const int newline = 2;
    static const int load = 3;
    static const int assign = 4;
    static const int equal = 5;
    static const int param = 6;
    static const int tlate = 7;
    static const int variable = 8;
    static const int func = 9;
    static const int op = 10;
};
class analyzer
{

};
//就是一个DFA自动机
class funchandle //处理直接调用函数的
{
public:
    int state = 0; //终态为2
    const int end = 2;
    std::string funname = "";
    std::string param = "";
    funchandle()
    {
        funname = "";
        param = "";
    }

    bool isend()
    {
        if (state == end) return true;
        else return false;
    }
    void makeasm(std::vector<std::string> &as)
    {
        
        //把这个as填充成合法的汇编
        if (param != "")
        {
            std::string temp = "";
            for (int i = 0; i < param.length(); i++)
            {
                if (param[i] != ',')
                {
                    temp = temp + param[i];
                }
                else
                {
                    as.push_back("PUSH " + temp);
                    temp = "";
                }
            }
            as.push_back("PUSH " + temp); //最后一个没有,做分割所以需要入栈
        }
        as.push_back("CALL "+funname);
        return;
    }
    void input(char input)
    {
        if (state == 0)
        {
            switch (input)
            {
            case '(':
                state = 1;
                break;
            default:
                funname=funname + input;
                break;
            }
        }
        else if (state == 1)
        {
            switch (input)
            {
            case ')':
                state = 2;
                break;
            default:
                param = param + input;
                break;
            }
        }
    }
};
class opassign //处理c=a+b或c=a这样的
{
public:
    int state = 0; //终态为2
    const int end = 2;
    std::string op;
    std::string left;
    std::string right1;
    std::string right2;
    opassign()
    {
        op = "";
        left = "";
        right1 = "";
        right2 = "";
    }
    bool isassign() // c=a
    {
        if (state == 1) return true;
        else return false;
    }
    bool isend() //c=a+b
    {
        if (state == end) return true;
        else return false;
    }
    void makeasm(std::vector<std::string> &as)
    {   
        if (right2 != "") as.push_back("MOV AX,0");
        as.push_back("MOV AX," + right1);
        if (right2!="") as.push_back(op + " AX," + right2);
        as.push_back("MOV " + left + ",AX");
        return;
    }
    void input(char input)
    {
        if (state == 0)
        {
            switch (input)
            {
            case '=':
                state = 1;
                break;
            default:
                left = left + input;
                break;
            }
        }
        else if (state == 1)
        {
            switch (input)
            {
            case '+':
                op = "ADD";
                state = 2;
                break;
            case '-':
                op = "SUB";
                state = 2;
                break;
            case '*':
                op = "MUL";
                state = 2;
                break;
            case '/':
                op = "DIV";
                state = 2;
                break;
            default:
                right1 = right1 + input;
                break;
            }
        }
        else if (state == 2)
        {
            right2 = right2 + input;
        }
    }
private:

};

class parser
{
public:
    std::vector<std::string> variables;
    std::string sourcecode = "";
    std::vector<std::string> source; //用分号真正的分行 方便后面token
    std::string tokenize = "";
    std::vector<std::string> asmcode;
    parser(std::string fname) //处理空格放在构造函数里
    {
        char buffer[256];
        std::fstream f;
        std::stringstream stream;
        f.open(fname);
        std::string temp;
        while (!f.eof())
        {
            f.getline(buffer,256,'\n');
            stream << buffer;
            temp = stream.str(); //否则空格截断
            stream.clear();
            stream.str(""); //这样才能清空
            sourcecode = sourcecode + temp;
            memset(buffer, 0, 256);
        }
        sourcecode = trim(sourcecode);
    };
    void maketoken() //生成token后的代码
    {
        int state = 0; //0表示匹配左操作符,1表示匹配op,2表示匹配右一,3表示匹配右二,4表示直接执行函数,5表示,6表示
        auto l = sourcecode.length();
        std::string newstr = "";
        char newline;
        newline = ';'; //必须用单引号 char用单引号代表一个字符
        for (int i = 0; i < l; i++)
        {
            if (sourcecode[i] == newline)
            {
                source.push_back(newstr);
                newstr = "";
            }
            else
            {
                newstr = newstr + sourcecode[i];
            }
        }
        //生成了无分号的source,可以进行标记了
        //以下判断是针对几种句型分别进行的,使用不同的dfa自动机
        for (int i = 0; i < source.size(); i++)
        {
            std::string row = source[i];
            opassign opa = opassign();
            funchandle func = funchandle();
            for (int j = 0; j < source[i].length(); j++)
            {
                opa.input(row[j]);
                func.input(row[j]);
            }
            if (func.isend()) func.makeasm(asmcode);
            else if (opa.isend()) opa.makeasm(asmcode);
            else if (opa.isassign()) opa.makeasm(asmcode);
        }
        //for (int i = 0; i < asmcode.size(); i++) std::cout << asmcode[i] << std::endl;
    }
    
private:

};
class every //万物即对象
{

};
class vm //执行汇编的虚拟机
{
//函数调用约定:栈传递参数,用AX保存返回值
public:
    std::vector<std::string> memory;
    std::vector<std::string> stack;
    std::string AX = ""; //AX寄存器
    std::map<std::string, std::string> variables;
    std::map<std::string, int> orderset;
    std::map<std::string, void(vm::*)()> function; //函数指针表
    vm(std::vector<std::string> asmcode)
    {
        /* 汇编指令表
        MOV 1
        ADD 2
        SUB 3
        MUL 4
        DIV 5
        PUSH 6
        CALL 7
        */
        orderset["MOV"] = 1;
        orderset["ADD"] = 2;
        orderset["SUB"] = 3;
        orderset["MUL"] = 4;
        orderset["DIV"] = 5;
        orderset["PUSH"] = 6;
        orderset["CALL"] = 7;
        memory=asmcode;
        /*
        函数表
        print - 0
        substr - 1
        */
        function["print"] = &vm::mhp_print; //用这种方式来指向成员
        function["sub"] = &vm::mhp_substr;
    }

    void mhp_print() //内置print函数
    {
        std::string str = stack.back();
        stack.pop_back();
        std::cout << str << std::endl;
        AX = "";
    }
    void mhp_substr() //内置substr函数
    {
        //调用约定"字符串",开始,结束
        std::stringstream stream;
        int start = 0;
        int end = 0;
        std::string temp = "";
        std::string e = stack.back();
        stack.pop_back();
        std::string s = stack.back();
        stack.pop_back();
        std::string str = stack.back();
        if (isstr(str)) //如果接受的参数是字符串
        {
            for (int i = 1; i < str.length() - 1; i++)
            {
                temp = temp + str[i];
            }
            str = temp;
            temp = "";
        }
        else if (isparam(str))
        {
            str=variables[str];
        }
        else
        {
            std::cout << "Type error,paramter1 must be string or variable" << std::endl;
            AX = "error";
        }
        stack.pop_back();
        stream << s;
        stream >> start;
        stream.clear();
        stream << e;
        stream >> end;
        stream.clear();
        if (end > str.length()) end = str.length(); //如果超出下标
        for (int i = 0; i < end; i++)
        {
            temp=temp + str[i];
        }
        AX = temp;
    }
    void eval(std::string code) //执行某一行代码
    {
        std::stringstream stream;
        std::string op = "";
        std::string p1 = "";
        std::string p2 = "";
        int j;
        int state = 0;
        for (int i = 0; i < code.length(); i++)
        {
            if (state == 0)
            {
                if (code[i] != ' ') op = op + code[i];
                else state = 1; //匹配到第一位
            }
            else if (state == 1)
            {
                if (code[i] != ',') p1=p1 + code[i];
                else state = 2;
            }
            else if (state == 2)
            {
                p2 = p2 + code[i];
            }
        }
        switch (orderset[op])
        {
            case 1: //MOV指令 考虑右边是AX的情况
                if (isparam(p1))
                {
                    if (isparam(p2)) //这里只是判断字符,函数名称有误
                    {
                        if (p1 == "AX")
                        {
                            if (isstr(p2)) //处理了是字符串的情况
                            {
                                std::string temp = "";
                                for (int i = 1; i < p2.length() - 1; i++)
                                {
                                    temp = temp + p2[i];
                                }
                                AX = p2;
                            }
                            else
                            {
                                AX = variables[p2];
                            }
                        }
                        if (p2 == "AX")
                        {
                            variables[p1] = AX;
                            //std::cout << "右边AX " << variables[p1] << std::endl;
                        }
                        else variables[p1] = variables[p2];
                    }
                    else
                    {
                        if (p1 == "AX")
                        {
                            AX = p2;
                        }
                        else variables[p1] = p2;
                    }
                }
                else
                {
                    std::cout << "syntax error,constant "+p1+" could not be assigned" << std::endl;
                }
                break;
            case 2: //ADD指令 p1必然是AX 由于全是字符串,所以需要做int转换
                if(isstr(p2)) //如果是字符串 不允许计算
                {
                    std::cout << "parser error,string " + p1 + " is not arithmetic" << std::endl;
                }
                else if(isparam(p2))
                {
                    float temp1;
                    float temp2;
                    stream << variables[p2];
                    stream >> temp2;
                    stream.clear();
                    stream << AX;
                    stream >> temp1;
                    stream.clear();
                    temp1 = temp1 + temp2;
                    AX = "";
                    stream << temp1;
                    stream >> AX;
                    stream.str("");
                    stream.clear();
                }
                else if (!isparam(p2))
                {
                    float temp1;
                    float temp2;
                    stream << p2;
                    stream >> temp2;
                    stream.clear();
                    stream << AX;
                    stream >> temp1;
                    stream.clear();
                    temp1 = temp1 + temp2;
                    AX = "";
                    stream << temp1;
                    stream >> AX;
                    stream.str("");
                    stream.clear();
                }
                break;
            case 3: //SUB指令 p1必然是AX 由于全是字符串,所以需要做int转换
                if (isstr(p2)) //如果是字符串 不允许计算
                {
                    std::cout << "parser error,string " + p1 + " is not arithmetic" << std::endl;
                }
                else if (isparam(p2))
                {
                    float temp1;
                    float temp2;
                    stream << variables[p2];
                    stream >> temp2;
                    stream.clear();
                    stream << AX;
                    stream >> temp1;
                    stream.clear();
                    temp1 = temp1 - temp2;
                    AX = "";
                    stream << temp1;
                    stream >> AX;
                    stream.str("");
                    stream.clear();
                }
                else if (!isparam(p2))
                {
                    float temp1;
                    float temp2;
                    stream << p2;
                    stream >> temp2;
                    stream.clear();
                    stream << AX;
                    stream >> temp1;
                    stream.clear();
                    temp1 = temp1 - temp2;
                    AX = "";
                    stream << temp1;
                    stream >> AX;
                    stream.str("");
                    stream.clear();
                }
                break;
            case 4: //MUL指令 p1必然是AX 由于全是字符串,所以需要做int转换
                if (isstr(p2)) //如果是字符串 不允许计算
                {
                    std::cout << "parser error,string " + p1 + " is not arithmetic" << std::endl;
                }
                else if (isparam(p2))
                {
                    float temp1;
                    float temp2;
                    stream << variables[p2];
                    stream >> temp2;
                    stream.clear();
                    stream << AX;
                    stream >> temp1;
                    stream.clear();
                    temp1 = temp1 * temp2;
                    AX = "";
                    stream << temp1;
                    stream >> AX;
                    stream.str("");
                    stream.clear();
                }
                else if (!isparam(p2))
                {
                    float temp1;
                    float temp2;
                    stream << p2;
                    stream >> temp2;
                    stream.clear();
                    stream << AX;
                    stream >> temp1;
                    stream.clear();
                    temp1 = temp1 * temp2;
                    AX = "";
                    stream << temp1;
                    stream >> AX;
                    stream.str("");
                    stream.clear();
                }
                break;
            case 5: //DIV指令 p1必然是AX 由于全是字符串,所以需要做int转换
                if (isstr(p2)) //如果是字符串 不允许计算
                {
                    std::cout << "parser error,string " + p1 + " is not arithmetic" << std::endl;
                }
                else if (isparam(p2))
                {
                    float temp1;
                    float temp2;
                    stream << variables[p2];
                    stream >> temp2;
                    stream.clear();
                    stream << AX;
                    stream >> temp1;
                    stream.clear();
                    temp1 = temp1 / temp2;
                    AX = "";
                    stream << temp1;
                    stream >> AX;
                    stream.str("");
                    stream.clear();
                }
                else if (!isparam(p2))
                {
                    long temp1;
                    long temp2;
                    stream << p2;
                    stream >> temp2;
                    stream.clear();
                    stream << AX;
                    stream >> temp1;
                    stream.clear();
                    temp1 = temp1 / temp2;
                    AX = "";
                    stream << temp1;
                    stream >> AX;
                    stream.str("");
                    stream.clear();
                }
                break;
            case 6: //PUSH指令,压入栈内
                if (isstr(p1))
                {
                    std::string paramter = p1;
                    stack.push_back(paramter);
                }
                else if (isparam(p1))
                {
                    stack.push_back(variables[p1]);
                }
                else stack.push_back(p1);
                break;
            case 7: //CALL指令,调用函数 这里通过函数调用表来调用
                void(vm::*p)() = function[p1];
                if (p == NULL)
                {
                    std::cout << "fatal error,no such function named " + p1 << std::endl;
                }
                else
                {
                    //std::cout << "调用了函数:" + p1 << std::endl;
                    (this->*function[p1])(); //只能这么调用,不懂为啥
                    std::cout << "返回值为:" + AX << std::endl;
                }
                break;
        }
    }
    void run() //开始执行代码
    {
        for (int i = 0; i < memory.size(); i++)
        {
            eval(memory[i]);
        }
    }
};
int main()
{
    std::string fname = "a.mhp";
    parser analysis = parser(fname);
    //std::cout << analysis.sourcecode << std::endl;
    analysis.maketoken();
    vm machine = vm(analysis.asmcode); //虚拟机
    //for (int i = 0; i < analysis.asmcode.size(); i++) std::cout << analysis.asmcode[i] << std::endl;
    machine.run(); //执行虚拟机
    return 0;
}

还不快抢沙发

添加新评论