Seuls les membres ayant 30 points peuvent parler sur le chat.

Forum Casio - Projets de programmation


Index du Forum » Projets de programmation » Mandelbrot Generator
RedcmdHors ligneMembrePoints: 209 Défis: 5 Message

Mandelbrot Generator

Posté le 15/06/2019 10:38

Hello allô

Default 1x zoom takes 7sec
Max zoom takes around 5-10min
It has a max zoom of 2^50: over one Quadrillion!
Going over 2^48 can be rather buggy
This is because numbers are limited to the 8 byte double variables

Attached file is both SH4 and SH3 compatible: MANDEL.G1A

This does need the 'MonochromeLib' libs the code comes with it now

Controls
[-] Zoom out
[+] Zoom in
[F1] Hide/show HUD which contains Cords, Zoom level and Max Iterations. (Heads Up Display)
[F2] Changes colours of camera rectangle: Black, White & Inverted
[AC] Resets screen back to default state
[EXE] Draw set
[EXIT] Stop drawing the Mandelbrot (If it's taking too long)
[MENU] Return to the menu screen
[REPLAY] Move camera rectangle around (Arrow Keys: [LEFT], [RIGHT], [UP], [DOWN])

How can I optimize this code to run faster or zoom in further?

#include "fxlib.h"
#include "stdio.h"

#define TRUE 1
#define FALSE 0

#define ML_vram_adress (*(sc_cpv)sc0135)

typedef enum { ML_TRANSPARENT = -1, ML_WHITE, ML_BLACK, ML_XOR, ML_CHECKER } ML_Color;
typedef char* (*sc_cpv)(void);
const unsigned int sc0135[] = { 0xD201D002, 0x422B0009, 0x80010070, 0x0135 };

unsigned int key;                //pause until key press
int kcode1, kcode2;                //row & col keycode for Bkey_GetKeyWait()
char unused;                    //unused (cause CASIO dumb dumb)
unsigned short dispX, dispY;    //cords on display when drawing mandelbrot

void ML_clear_vram() {
    int i, end, * pointer_long, vram;
    char* pointer_byte;
    vram = (int)ML_vram_adress();
    end = 4 - vram & 3;
    pointer_byte = (char*)vram;
    for (i = 0; i < end; i++) pointer_byte[i] = 0;
    pointer_long = (int*)(vram + end);
    for (i = 0; i < 255; i++) pointer_long[i] = 0;
    pointer_byte += 1020 + end;
    end = vram & 3;
    for (i = 0; i < end; i++) pointer_byte[i] = 0;
}

void ML_display_vram() {
    char* LCD_register_selector = (char*)0xB4000000, * LCD_data_register = (char*)0xB4010000, * vram;
    int i, j;
    vram = ML_vram_adress();
    for (i = 0; i < 64; i++) {
        *LCD_register_selector = 4;
        *LCD_data_register = i | 192;
        *LCD_register_selector = 4;
        *LCD_data_register = 0;
        *LCD_register_selector = 7;
        for (j = 0; j < 16; j++)
            *LCD_data_register = *vram++;
    }
}

void ML_display_vram_row(int row) {            //faster than ML_display_vram() which displays the entire screen instead of a single row
    unsigned char i;
    char* LCD_register_selector = (char*)0xB4000000, *LCD_data_register = (char*)0xB4010000, *vram;
    vram = (row << 4) + ML_vram_adress();
    *LCD_register_selector = 4;
    *LCD_data_register = row | 192;
    *LCD_register_selector = 4;
    *LCD_data_register = 0;
    *LCD_register_selector = 7;
    for (i = 0; i < 16; i++)
        * LCD_data_register = *vram++;
}

void ML_horizontal_line(int y, int x1, int x2, ML_Color color) {
    int i;
    char checker;
    char* vram = ML_vram_adress();
    if (y & ~63 || (x1 < 0 && x2 < 0) || (x1 > 127 && x2 > 127))
        return;
    if (x1 > x2) {
        i = x1;
        x1 = x2;
        x2 = i;
    }
    if (x1 < 0)
        x1 = 0;
    if (x2 > 127)
        x2 = 127;
    switch (color) {
        case ML_BLACK:
            if (x1 >> 3 != x2 >> 3) {
                vram[(y << 4) + (x1 >> 3)] |= 255 >> (x1 & 7);
                vram[(y << 4) + (x2 >> 3)] |= 255 << 7 - (x2 & 7);
                for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
                    vram[(y << 4) + i] = 255;
            } else
                vram[(y << 4) + (x1 >> 3)] |= (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
            break;
        case ML_WHITE:
            if (x1 >> 3 != x2 >> 3) {
                vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
                vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
                for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
                    vram[(y << 4) + i] = 0;
            } else
                vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
            break;
        case ML_XOR:
            if (x1 >> 3 != x2 >> 3) {
                vram[(y << 4) + (x1 >> 3)] ^= 255 >> (x1 & 7);
                vram[(y << 4) + (x2 >> 3)] ^= 255 << 7 - (x2 & 7);
                for (i = (x1 >> 3) + 1; i < (x2 >> 3); i++)
                    vram[(y << 4) + i] ^= 255;
            } else
                vram[(y << 4) + (x1 >> 3)] ^= (255 >> ((x1 & 7) + 7 - (x2 & 7))) << (7 - (x2 & 7));
            break;
        case ML_CHECKER:
            checker = (y & 1 ? 85 : 170);
            if (x1 >> 3 != x2 >> 3) {
                vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
                vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
                vram[(y << 4) + (x1 >> 3)] |= checker & 255 >> (x1 & 7);
                vram[(y << 4) + (x2 >> 3)] |= checker & 255 << 7 - (x2 & 7);
                for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
                    vram[(y << 4) + i] = checker;
            } else {
                vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
                vram[(y << 4) + (x1 >> 3)] |= checker & (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
            }
            break;
    }
}

void ML_vertical_line(int x, int y1, int y2, ML_Color color) {
    int i, j;
    char checker, byte, * vram = ML_vram_adress();
    if (x & ~127 || (y1 < 0 && y2 < 0) || (y1 > 63 && y2 > 63)) return;
    if (y1 > y2) {
        int tmp = y1;
        y1 = y2;
        y2 = tmp;
    }
    if (y1 < 0) y1 = 0;
    if (y2 > 63) y2 = 63;

    i = (y1 << 4) + (x >> 3);
    j = (y2 << 4) + (x >> 3);
    switch (color) {
        case ML_BLACK:
            byte = 128 >> (x & 7);
            for (; i <= j; i += 16)
                vram[i] |= byte;
            break;
        case ML_WHITE:
            byte = ~(128 >> (x & 7));
            for (; i <= j; i += 16)
                vram[i] &= byte;
            break;
        case ML_XOR:
            byte = 128 >> (x & 7);
            for (; i <= j; i += 16)
                vram[i] ^= byte;
            break;
        case ML_CHECKER:
            byte = 128 >> (x & 7);
            checker = y1 & 1 ^ x & 1;
            for (; i <= j; i += 16) {
                if (checker) vram[i] &= ~byte;
                else vram[i] |= byte;
                checker = !checker;
            }
            break;
    }
}

void ML_pixel(int x, int y, ML_Color color) {
    char* vram = ML_vram_adress();
    if (x & ~127 || y & ~63) return;
    switch (color) {
        case ML_BLACK:
            vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
            break;
        case ML_WHITE:
            vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
            break;
        case ML_XOR:
            vram[(y << 4) + (x >> 3)] ^= 128 >> (x & 7);
            break;
        case ML_CHECKER:
            if (y & 1 ^ x & 1) vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
            else vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
            break;
    }
}

double divByPow(double n, double x, int p) {        //Divide OR Times n by x, p times (n / x^p): used for numbers bigger than 2^32 (int limit)
    if (p < 0)
        for (; p < 0; p++)
            n *= x;
    else
        for (; p > 0; p--)
            n /= x;
    return n;
}

void stop(void) {            //stops drawing set if user presses [EXIT] or [MENU]
    if (Bkey_GetKeyWait(&kcode1, &kcode2, 1, 0, 1, &unused))
        if (kcode1 == 4 && (kcode2 == 8 || kcode2 == 9)) {
            dispX = 128;    //Very hacky stop function
            dispY = 64;
        }
}

int AddIn_main(int isAppli, unsigned short OptionNum) {        //Main function
    unsigned int graphZoom = 1;                //zoom level for graph
    char screenZoom;                        //zoom level on screen (rectangle)
    int screenX1, screenX2;                    //corner X cords for drawing rectangle to screen
    int screenY1, screenY2;                    //corner Y cords for drawing rectangle to screen
    unsigned char string[1];                //Used in converting int/double to char
    char HUD = TRUE;                        //Heads Up Display: Cords, Zoom level & Max iteration: toggle with [F1]
    char colour = ML_XOR;                    //Colour of rectangle: Black, White or Inverted
    int screenX, screenY;                    //offset cords on screen from 0,0 for rectangle
    double graphX = 0, graphY = 0;            //cords on graph - where to center mandelbrot
    double graphMove;                        //amount graphX & Y changes by when moving rectangle around
    int screenMove;                            //amount screenX & Y changes by when moving rectangle around with arrow keys
    short tempPixel = 0;                    //Write pixels to temp variable then write the entire 2bytes to VRAM all at once

    register double zr, zi;                    //zr is real, zi imaginary
    register double zr2, zi2;                //zr2 = zr^2, zi2 = zi^2
    register double x1 = -2.0;                //bounding box cords on graph
    register double x2 = 2.0;                //bounding box cords on graph
    register double y1 = -1.0;                //bounding box cords on graph
    register double y2 = 1.0;                //bounding box cords on graph
    register double x, y;                    //pixel cords on graph tested if in set
    register double xIsz, yIsz;                //amount x/y increases by when ploting graph
    register unsigned short iMax = 32;        //max iterations
    register unsigned short i;                //iterations

    while (TRUE) {
        register char* vram = ML_vram_adress();

        SetTimer(1, 200, stop);
        ML_clear_vram();
        ML_display_vram();

        xIsz = (x2 - x1) / 128;
        yIsz = (y2 - y1) / 64;

        y = y1;
        for (dispY = 0; dispY < 64; dispY++) {
            x = x1;
            y += yIsz;
            for (dispX = 0; dispX < 128; dispX++) {
                zr = x;
                zi = y;
                for (i = 0; i < iMax; i++) {
                    zr2 = zr * zr;
                    zi2 = zi * zi;
                    if (zr2 + zi2 > 4)
                        break;
                    zi = zr * zi;
                    zi += zi + y;
                    zr = zr2 - zi2 + x;
                }
                tempPixel = (tempPixel << 1) | (i == iMax);
                if ((dispX & 7) == 7)
                    *vram++ = tempPixel;
                x += xIsz;
            }
            ML_display_vram_row(dispY);
        }
        SaveDisp(1);
        KillTimer(1);
        screenX = 0;
        screenY = 0;
        screenZoom = 1;
        Bkey_GetKeyWait(&kcode1, &kcode2, 2, 1, 1, &unused);
        do {
            GetKey(&key);
            screenMove = screenZoom > 4 ? 1 : divByPow(16, 2, screenZoom);
            graphMove = screenZoom > 4 ? divByPow(1, 2, graphZoom - (double)screenZoom) : divByPow(16, 2, graphZoom);
            switch (key) {
                case KEY_CHAR_PLUS:
                    if (graphZoom < 51) {
                        graphZoom++;
                        screenZoom++;
                    }
                    break;
                case KEY_CHAR_MINUS:
                    if (graphZoom) {
                        graphZoom--;
                        screenZoom--;
                    }
                    break;
                case KEY_CTRL_UP:
                    screenY -= screenMove;
                    graphY -= graphMove;
                    break;
                case KEY_CTRL_DOWN:
                    screenY += screenMove;
                    graphY += graphMove;
                    break;
                case KEY_CTRL_LEFT:
                    screenX -= screenMove;
                    graphX -= graphMove;
                    break;
                case KEY_CTRL_RIGHT:
                    screenX += screenMove;
                    graphX += graphMove;
                    break;
                case KEY_CTRL_F1:
                    HUD = !HUD;
                    break;
                case KEY_CTRL_F2:
                    if (colour)
                        colour--;
                    else
                        colour = ML_XOR;
                    break;
                case KEY_CTRL_F3:
                    //Gray scale, by refreshing screen multiple times per sec at different max iterations (iMax)
                    break;
                case KEY_CTRL_AC:
                    graphZoom = 1;
                    graphX = 0;
                    graphY = 0;
                    screenZoom = 1;
                    screenX = 0;
                    screenY = 0;
                    key = KEY_CTRL_EXE;
                    break;
            }
            RestoreDisp(1);
            iMax = 8 * (graphZoom + 3);

            if (screenZoom < 8) {
                screenX1 = 65 - divByPow(128, 2, screenZoom) + screenX;
                screenX2 = 62 + divByPow(128, 2, screenZoom) + screenX;
                screenY1 = 32 - (screenZoom > 6 ? 1 : divByPow(64, 2, screenZoom)) + screenY;
                screenY2 = 31 + (screenZoom > 6 ? 0 : divByPow(64, 2, screenZoom)) + screenY;
                ML_horizontal_line(screenY1, screenX1, screenX2, colour);
                ML_horizontal_line(screenY2, screenX1, screenX2, colour);
                ML_vertical_line(screenX1 - 1, screenY1, screenY2, colour);
                ML_vertical_line(screenX2 + 1, screenY1, screenY2, colour);
            } else
                ML_pixel(screenX + 64, screenY + 31, colour);

            x1 = divByPow(-4, 2, graphZoom) + (0.03125 * graphX);
            x2 = divByPow(4, 2, graphZoom) + (0.03125 * graphX);
            y1 = divByPow(-2, 2, graphZoom) + (0.03125 * graphY);
            y2 = divByPow(2, 2, graphZoom) + (0.03125 * graphY);

            if (HUD == TRUE) {
                sprintf(&string, "X1:%f", x1);
                PrintMini(0, 0, string, 0);
                sprintf(&string, "Y1:%f", y1);
                PrintMini(0, 6, string, 0);
                sprintf(&string, "X2:%f", x2);
                PrintMini(81, 53, string, 0);
                sprintf(&string, "Y2:%f", y2);
                PrintMini(81, 59, string, 0);
                sprintf(&string, "MaxI:%u", iMax);
                PrintMini(0, 53, string, 0);
                if (graphZoom > 32)
                    sprintf(&string, "Zoom:2^%ux", graphZoom - 1);
                else
                    sprintf(&string, "Zoom:%ux", (int)divByPow(1, 2, -graphZoom + 1));
                PrintMini(0, 59, string, 0);
            }

            ML_display_vram();

        } while (key != KEY_CTRL_EXE);
    }
    return 0;
}


#pragma section _BR_Size
unsigned long BR_Size;
#pragma section
#pragma section _TOP
int InitializeSystem(int isAppli, unsigned short OptionNum) {
    return INIT_ADDIN_APPLICATION(isAppli, OptionNum);
}
#pragma section


Fichier joint


Pages : Précédente1, 2, 3, 4, 5
RedcmdHors ligneMembrePoints: 209 Défis: 5 Message

Citer : Posté le 16/11/2019 03:46 | # | Fichier joint


oops
I removed it in code, but forgot to recompile

Once I have got the 64bit Fixed Point assembly code all working
I'll start adding more features
- Gray Scale
- Higher zoom level
- Faster rendering
- Customizable Iterations
- Customizable HUD
- Julia Set

MANDEL.G1A

RedCMD#4299 - Discord
Mandelbrot SNKEmini Minesweeper Sudoku
Mrvoxy
Statut : Invité

Citer : Posté le 16/11/2019 04:08 | #


Wow! I eagerly wait!
LephenixnoirHors ligneAdministrateurPoints: 16767 Défis: 140 Message

Citer : Posté le 16/11/2019 08:54 | #


Wow. I tried that with full overclock, that's some stunning results right there! Deep full-screen images take about 10s and the overall fractal is drawn in about 1s!

I know it would be slower on fx-CG 50 due to the larger screen (9× more pixels to draw), but the colors, ah... x)
RedcmdHors ligneMembrePoints: 209 Défis: 5 Message

Citer : Posté le 16/11/2019 10:10 | #


Im not sure how to sign format the numbers
0x07000000 07000000 converted to negative becomes 0xF8FFFFFF F9000000
but the negative output of the multiplier (or subtractors) is 0xF9000000 F9000000, this is because after inverting the number, it adds +1 to both limbs of the whole number, rather than just the lowest limb
I'm having huge problems with the inputs needing to be 'normal' negative numbers, but everything outputs the double +1 numbers
But if everything is kept postive (multiplier and adderr, no subtractors) its all fine
I'm confussed that other people online dont seem to have this problem, I wonder if it just doesn't matter, or they are using other methods that negate it
RedCMD#4299 - Discord
Mandelbrot SNKEmini Minesweeper Sudoku
LephenixnoirHors ligneAdministrateurPoints: 16767 Défis: 140 Message

Citer : Posté le 16/11/2019 11:00 | #


It might be useful to remember the negation identity: -x = ~x + 1.

This is true whatever the size of the integer is. Here you are dealing with 64-bits, but it works just as well as with 32-bit integers. The main change is that you need to use carry-aware instructions to propagate changes from one half to the other.

Just as with addc, there is a negc instruction which does what you need. In fact, the example usage of addc in the manual is exactly this operation.

Negating r0,r1
Before: r0,r1 = 00000000,00000001
After:  r0,r1 = ffffffff,ffffffff

clrt
negc r1, r1
negc r0, r0
RedcmdHors ligneMembrePoints: 209 Défis: 5 Message

Citer : Posté le 21/11/2019 08:33 | #


The reason why no one else had problems with negative numbers is because
1. They didn't bother, and their design wouldn't work with negatives
or 2. They detected if the number is negative and made the number postive then readded the sign at the end
so thats what I did

some half Optimized code
_mul64:                                 ;mul64(x1, x0, y1, y0, &high, &mid, &low, &below)
                                        ;       r4, r5, r6, r7,      r8,    r9,   10,      11
;  12 * 34      =     10*30 +  2*30 + 10*4  +  2*4   =  300 + 60 + 40 + 8  =     408
;x1x0 * y1y0  =     x1*y1 + x0*y1 + x1*y0 + x0*y0  =    

;Decimal point is 8bits right, from the left        1:7:56        Sign:Int:Frac

    mov.l    r8,         @-r15
    mov.l    r9,         @-r15
    mov.l    r10,        @-r15
    mov.l    r11,        @-r15
    mov.l    #1,         r1
    mov.l    #0,         r10
    mov.l    #0,            r11

    cmp/ge    r10,        r4
    bt        _positiveX
    negc    r5,            r5
    negc    r4,            r4
    xor        r1,            r11
_positiveX:

    cmp/ge    r10,        r6
    bt        _positiveY
    negc    r7,            r7
    negc    r6,            r6
    xor        r1,            r11
_positiveY:

    mov.l    #0,            r1

    dmulu.l r5,         r7        ;x0 * y0
    sts     mach,        r2

    clrt
    dmulu.l r5,         r6        ;x0 * y1
    sts     macl,        r8
    addc    r8,         r2
    sts     mach,        r8
    addc    r8,         r1
    movt    r0
    
    clrt
    dmulu.l r4,         r7        ;x1 * y0
    sts     macl,        r8
    addc    r8,         r2
    sts     mach,        r8
    addc    r8,         r1
    addc    r10,        r0

    clrt
    dmulu.l r4,         r6        ;x1 * y1
    sts     macl,        r8
    addc    r8,         r1
    sts     mach,        r8
    addc    r8,         r0
                                ;   XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW
                                ;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
    mov        #8,            r8        ;8            //Left
    shld    r8,         r0        ;    XXXXXX
    mov        r1,         r5
    shld    r8,         r1        ;    YYYYYY

    mov        #-24,        r8        ;8-32        //Right
    shld    r8,         r5        ;YY
    shld    r8,         r2        ;ZZ
    
    add        r5,            r0
    add        r2,            r1
    
    cmp/eq    r10,        r11
    bt        _positive
    negc    r1,            r1
    negc    r0,            r0
_positive:

    mov.l    @(16,r15),    r4
    mov.l    @(20,r15),    r5
    mov.l    @(24,r15),    r6
    mov.l    @(28,r15),    r7
    mov.l    r0,            @r4     ;High
    mov.l    r1,            @r5     ;Mid
    mov.l    @r15+,        r11
    mov.l    @r15+,        r10
    mov.l    @r15+,        r9
    rts
    mov.l    @r15+,        r8_mul64Optimized:                         ;mul64Optimized(x1, x0, y1, y0, &high, &mid, &low, &below)
                                        ;                r4, r5, r6, r7,       r8,     r9,   10,       11
;  12 * 34      =     10*30 +  2*30 + 10*4  +  2*4   =  300 + 60 + 40 + 8  =     408
;x1x0 * y1y0  =     x1*y1 + x0*y1 + x1*y0 + x0*y0  =    

;Decimal point is 8bits right, from the left        1:7:56        Sign:Int:Frac

    mov.l    r8,         @-r15
    mov.l    r9,         @-r15
    mov.l    r10,        @-r15
    mov.l    r11,        @-r15
    mov.l    #1,         r1
    mov.l    #0,         r10
    mov.l    #0,            r11

    cmp/ge    r10,        r4
    bt        _positiveX
    negc    r5,            r5
    negc    r4,            r4
    xor        r1,            r11
_positiveX:

    cmp/ge    r10,        r6
    bt        _positiveY
    negc    r7,            r7
    negc    r6,            r6
    xor        r1,            r11
_positiveY:

    mov.l    #0,            r1

    dmulu.l r5,         r7        ;x0 * y0
    sts     mach,        r2

    clrt
    dmulu.l r5,         r6        ;x0 * y1
    sts     macl,        r8
    addc    r8,         r2
    sts     mach,        r8
    addc    r8,         r1
    movt    r0
    
    clrt
    dmulu.l r4,         r7        ;x1 * y0
    sts     macl,        r8
    addc    r8,         r2
    sts     mach,        r8
    addc    r8,         r1
    addc    r10,        r0

    clrt
    dmulu.l r4,         r6        ;x1 * y1
    sts     macl,        r8
    addc    r8,         r1
    sts     mach,        r8
    addc    r8,         r0
                                ;   XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW
                                ;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
    mov        #8,            r8        ;8            //Left
    shld    r8,         r0        ;    XXXXXX
    mov        r1,         r5
    shld    r8,         r1        ;    YYYYYY

    mov        #-24,        r8        ;8-32        //Right
    shld    r8,         r5        ;YY
    shld    r8,         r2        ;ZZ
    
    add        r5,            r0
    add        r2,            r1
    
    cmp/eq    r10,        r11
    bt        _positive
    negc    r1,            r1
    negc    r0,            r0
_positive:

    mov.l    @(16,r15),    r4
    mov.l    @(20,r15),    r5
    mov.l    @(24,r15),    r6
    mov.l    @(28,r15),    r7
    mov.l    r0,            @r4     ;High
    mov.l    r1,            @r5     ;Mid
    mov.l    @r15+,        r11
    mov.l    @r15+,        r10
    mov.l    @r15+,        r9
    mov.l    @r15+,        r8
    rts
    nop


some optimized and very unreadable code
_mul64Optimized:                         ;mul64Optimized(x1, x0, y1, y0, &high, &mid, &low, &below)
                                        ;                r4, r5, r6, r7,       r8,     r9,   10,       11
;  12 * 34      =     10*30 +  2*30 + 10*4  +  2*4   =  300 + 60 + 40 + 8  =     408
;x1x0 * y1y0  =     x1*y1 + x0*y1 + x1*y0 + x0*y0  =    

;Decimal point is 8bits right, from the left        1:7:56        Sign:Int:Frac

    mov.l    r11,        @-r15
    mov.l    #0,            r0
    mov.l    #0,         r11

    cmp/ge    r11,        r4
    bt        _positiveX
    ;wasted cycle
    negc    r5,            r5
    negc    r4,            r4
    xor        #1,            r0
_positiveX:

    cmp/ge    r11,        r6
    bt/s    _positiveY
    mov.l    r10,        @-r15
    negc    r7,            r7
    negc    r6,            r6
    xor        #1,            r0
_positiveY:

    dmulu.l r5,         r7        ;x0 * y0
    mov.l    r9,         @-r15
    mov.l    r8,         @-r15
    mov.l    #0,            r1
    sts     mach,        r2

    dmulu.l r5,         r6        ;x0 * y1
    mov        r0,            r10
    clrt
    ;wasted cycle
    sts     macl,        r8
    addc    r8,         r2
    sts     mach,        r8
    
    dmulu.l r4,         r7        ;x1 * y0
    addc    r8,         r1
    movt    r0
    clrt
    sts     macl,        r8
    addc    r8,         r2
    sts     mach,        r8

    dmulu.l r4,         r6        ;x1 * y1
    addc    r8,         r1
    addc    r11,        r0
    clrt
    sts     macl,        r8
    addc    r8,         r1
    sts     mach,        r8
    addc    r8,         r0
                                ;   XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW
                                ;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
    mov        #8,            r8        ;8            //Left
    shld    r8,         r0        ;    XXXXXX
    mov        r1,         r5
    shld    r8,         r1        ;    YYYYYY

    mov        #-24,        r8        ;8-32        //Right
    shld    r8,         r5        ;YY
    shld    r8,         r2        ;ZZ
    
    add        r5,            r0
    add        r2,            r1
    
    cmp/eq    r11,        r10
    bt/s    _positive
    mov.l    @(16,r15),    r4
    negc    r1,            r1
    negc    r0,            r0
_positive:

    mov.l    @(20,r15),    r5
    mov.l    r0,            @r4     ;High
    mov.l    r1,            @r5     ;Mid
    mov.l    @r15+,        r8
    mov.l    @r15+,        r9
    mov.l    @r15+,        r10
    rts
    mov.l    @r15+,        r11

RedCMD#4299 - Discord
Mandelbrot SNKEmini Minesweeper Sudoku
LephenixnoirHors ligneAdministrateurPoints: 16767 Défis: 140 Message

Citer : Posté le 21/11/2019 09:35 | #


Looks nice! I wonder if this could be shorter. Have you looked at the libgcc implementation? Here is how they multiply unsigned 64-bit values (where you don't have carry problems):

Notation: All product rxry must be seen as 64-bit
          .h and .l represent the high and low halves of a 64-bit value
          X is 2^32 (also looks like a polynomial)

(Xr4+r5)(Xr6+r7) = r5r7 + X(r4r7 + r5r6) + X^2(r4r6)
                 = r5r7 + X(r4r7 + r5r6)                (X^2 overflows)
                 = r5r7 + X(r4r7.l + r5r6.l)            (X * rxry.h overflows)
Output is:
  r0 = r4r7.l + r5r6.l + r5r7.h    (higher half)
  r1 = r5r7.l                      (lower half)

dmulu.l    r5,r7
sts    macl,r1        # r1 = r5r7.l
sts    mach,r2         # r2 = r5r7.h
mul.l    r6,r5
sts    macl,r0         # r0 = r5r6.l
mul.l    r7,r4
add    r2,r0           # r0 = r5r6.l + r5r7.h
sts    macl,r2         # r2 = r4r7.l
rts    
add    r2,r0           # r0 = r4r7.l + r5r6.l + r5r7.h

I'm mainly mentioning this because of the "analysis" of the multiplication, which allows using 32-bit multiplications at times and has no carry. If you make both operands unsigned at the beginning then add the sign at the very end, maybe you can gain on these clrt and adds everywhere?

Anyway, this already looks really good! The performance of the fixed-point version is clearly very fast!
RedcmdHors ligneMembrePoints: 209 Défis: 5 Message

Citer : Posté le 21/11/2019 09:48 | #


That 64x64 only gives the lower half of the 128bit output
But I need the high half because the fixed point is very high up
That why it has normal mul.l and not dmulu.l

I had a feeling that I didn't need any of those carries, I had them there when testing signed multiplication

Ajouté le 03/01/2020 à 22:20 :
how can I create a "function" in asm?
I want to branch to the multiplication code, then have it automatically return back to the spot it first branched from

I sort of got it working with bsr but it overwrites the pr
and value when I want to return back to the C code, it insteads returns to the bsr
RedCMD#4299 - Discord
Mandelbrot SNKEmini Minesweeper Sudoku
LephenixnoirHors ligneAdministrateurPoints: 16767 Défis: 140 Message

Citer : Posté le 03/01/2020 22:30 | #


Yeah bsr (you can also use jsr) overrides pr, thus you have to save pr whenever you know that you will call subfunctions.

sts.l pr, @-r15
# Call...
lds.l @r15+, pr

The calling convention is that you should also save r0-r7 if you need them later on. On the other hand, you can leave r8-r15 as is, and the called function will not overwrite them (just like pr).
RedcmdHors ligneMembrePoints: 209 Défis: 5 Message

Citer : Posté le 03/01/2020 23:04 | #


Thx, works perfectly

I need to pass 4 values in and out
should I use r0-r3, r4-r7 or r8-r11?
RedCMD#4299 - Discord
Mandelbrot SNKEmini Minesweeper Sudoku
LephenixnoirHors ligneAdministrateurPoints: 16767 Défis: 140 Message

Citer : Posté le 04/01/2020 07:49 | #


Parameters should go to r4 to r7 (in this order), and the return value should be r0. If you have more parameters, pass them on the stack in reverse order (fifth parameter on top, sixth below, and so on). If you have more return values, either pass pointers as parameters or return them in r0 to r3. Second option should be checked for compatibility with the C ABI.

These are just conventions, you don't absolutely need them if your code is not going to get called directly from C code, but it cannot hurt to observe them.
RedcmdHors ligneMembrePoints: 209 Défis: 5 Message

Citer : Posté le 05/01/2020 09:47 | #


umm...
is there a limit on "addressable" space on the stack?
error when building
Mandelasm.src(138) : 402 (E) ILLEGAL VALUE IN OPERAND

line 138
mov.l    @(64,r15),    r2

changing 64 to 60 removes the error, but the address is 4 too low
RedCMD#4299 - Discord
Mandelbrot SNKEmini Minesweeper Sudoku
LephenixnoirHors ligneAdministrateurPoints: 16767 Défis: 140 Message

Citer : Posté le 05/01/2020 10:22 | #


Yes, indeed. Check out the instruction:

MOV.L @(disp,Rm), Rn
Operation: (disp × 4 + Rn) → Rm
Code: 0101nnnnmmmmdddd

As you can see, there are only 4 bits for d, which means that you can only go from 0 to 60 in steps of 4.

But fear not, because you can just use a more powerful instruction such as this:
mov #64, r0
mov.l @(r0,r15), r2

You have to use r0 as the index here (instructions almost never have 3 parameters because this would span too many of the available 64k opcodes, so r0 is fixed).

Or you can use the Global Base Register which exists specifically for that purpose. As you can see, because the following instructions uses gbr instead of rm as base, the 4 bits dedicated to the value m are now available for d:

MOV.L @(disp,GBR), R0
Operation: (disp × 4 + GBR) → R0
Code: 11000110dddddddd

You still need to use r0, but now at least you don't have to add calculations to obtain your index. You can do the following:

# Only once
ldc r15, gbr
# Enjoy!
mov.l @(64, gbr), r0

Remember to save GBR in functions, IIRC it's callee-saved.
RedcmdHors ligneMembrePoints: 209 Défis: 5 Message

Citer : Posté le 06/01/2020 10:15 | #


since I haven't been working on this from Nov 25 (due to school and other stuff). I've finally come back to this only 5 days ago
And I now have a fully working 64bit fixed point mandelbrot in asm
Gonna go to bed now; but will optimize, clean up and comment everything tomorrow

The function in C to call drawMandel in asm
drawMandel(zoom, vram, yHighStart, yLowStart, xHighStart, xLowStart, iMax);
Currently, I'm just using these params for testing
drawMandel(1, vram, -0x01000000, -0x00000000, -0x02000000, -0x00000009, 50);

drawMandel in asm
_drawMandel:                        ;drawMandel(zoom, vram, yHighStart, yLowStart, xHighStart, xLowStart, iMax);
;r0 = temp0
;r1 = temp1
;r2 = highIsz,        vram
;r3 = lowIsz
;r4 = zoom,            xHigh
;r5 = vram,            xLow
;r6 = yHighStart,    yHigh
;r7 = yLowStart,    yLow
;r8 = tempPixel
;r9 =
;r10 =
;r11 =
;r12 =
;r13 =
;r14 =
;@(,r15) = xHighStart
;@(,r15) = xLowStart
;@(,r15) = iMax
;@(,r15) = vram

;64bit number format - spilt between two 32bit variables
;1:7:56
;Sign:Int:Frac
;±:0000000:00000000000000000000000000000000000000000000000000000000
;±0000000.000000000000000000000000,00000000000000000000000000000000
;high =    ±0000000.000000000000000000000000
;low =    00000000000000000000000000000000

    ;stc.l    gbr,        @-r15        ;gbr            PUSH!! 0
    mov.l    r8,         @-r15        ;r8                PUSH!! 1
    mov.l    r9,         @-r15        ;r9                PUSH!! 2
    mov.l    r10,        @-r15        ;r10            PUSH!! 3
    mov.l    r11,        @-r15        ;r11            PUSH!! 4
    mov.l    r12,        @-r15        ;r12            PUSH!! 5
    mov.l    r13,        @-r15        ;r13            PUSH!! 6
    mov.l    r14,        @-r15        ;r14            PUSH!! 7
    sts.l    pr,            @-r15        ;pr                PUSH!! 8
    ;ldc        r15,        gbr            ;gbr = r15
;r2 = lowIsz    //±0000000.000000000000000000000000
;r3 = highIsz    //00000000000000000000000000000000
;r4 = zoom        //0 - 56
    
;highIsz = 0x00100000 >> zoom;
    neg        r4,            r4            ;zoom = -zoom
    mov.l    #1048576,    r2            ;highIsz = 0x00100000
    shld    r4,            r2            ;highIsz >>= zoom

;lowIsz = zoom < 21 ? 0x80000000 >> zoom - 21 : 0;
    mov.l    #2147483648,r3            ;lowIsz = 0x80000000
    add        #21,        r4            ;zoom += 21
    
    cmp/pl    r4                        ;T = zoom > 0
    bt/s    highBits                ;if(T == 1) branch high        //+delay Slot
    shld    r4,            r3            ;lowIsz >>= zoom
    
    mov        #0,            r2            ;highIsz = 0
highBits:

    mov.l    r2,            @-r15        ;highIsz        PUSH!! 9
    mov.l    r3,            @-r15        ;lowIsz            PUSH!! 10




;Main Loops
    mov.l    r5,            @-r15        ;vram            PUSH!! 11
    mov        #0,            r8            ;tempPixel = 0
    mov        #64,        r0            ;row = 64

row:                                ;for (row = 64; row > 0; row--) {
    mov.l    r0,            @-r15        ;row            PUSH!! 12
    mov        @(48,r15),    r4            ;xHighStart
    mov        @(52,r15),    r5            ;xLowStart
    mov.l    #128,        r1            ;col = 128

col:                                ;for (col = 128; col > 0; col--) {
    mov        @(56,r15),    r10            ;iMax
    mov.l    r6,            @-r15        ;yHigh            PUSH!! 13
    mov.l    r7,            @-r15        ;yLow            PUSH!! 14
    mov.l    r4,            @-r15        ;xHigh            PUSH!! 15
    mov.l    r5,            @-r15        ;xLow            PUSH!! 16
    mov.l    r1,            @-r15        ;col            PUSH!! 17
    mov.l    r8,            @-r15        ;tempPixel        PUSH!! 18

innerLoop:                             ;for (i = iMax; i > 0; i--) {
    ;mov.l    r10,        @-r15        ;i                PUSH!! 19
    
;r4 = zrHigh = xHigh
;r5 = zrLow     = xLow
;r6 = ziHigh = yHigh
;r7 = ziLow  = yLow

    mov        r4,            r8            ;zrHigh
    mov        r5,            r9            ;zrLow
    bsr        sq64                    ;zr2 = zr * zr
    nop
    mov        r0,            r13            ;zr2High
    mov        r1,            r14            ;zr2Low

    mov        r6,            r4            ;ziHigh
    mov        r7,            r5            ;ziLow
    bsr        sq64                    ;zi2 = zi * zi
    nop
    mov        r0,            r11            ;zi2High
    mov        r1,            r12            ;zi2Low
    
;if (zr2 + zi2 > 4)
    addc    r14,        r1            ;zi2Low += zr2Low
    addc    r13,        r0            ;zi2High += zr2High
    mov.l    #67108864,    r1            ;0x04000000    //±0000100.000000000000000000000000
    cmp/ge    r0,            r1            ;T = 0x04000000 >= zi2High
    bf        exitInnerLoop            ;if(T == 0) branch exitInnerLoop

;zi *= zr;
    mov        r8,            r4            ;zrHigh
    mov        r9,            r5            ;zrLow
    bsr        mul64                    ;zi *= zr
    nop

;zi += zi + y;
    clrt
    addc    r1,            r1            ;ziLow += ziLow
    addc    r0,            r0            ;ziHigh += ziHigh
    mov.l    @(16,r15),    r7            ;yLow
    mov.l    @(20,r15),    r6            ;yHigh
    clrt
    addc    r1,            r7            ;ziLow += yLow
    addc    r0,            r6            ;ziHigh += yHigh

;zr = zr2 - zi2 + x;
    clrt
    subc    r12,        r14            ;zr2Low -= zi2Low
    subc    r11,        r13            ;zr2High -= zi2High
    clrt
    mov.l    @(8,r15),    r5            ;xLow
    mov.l    @(12,r15),    r4            ;xHigh
    addc    r14,        r5            ;zrLow += xLow
    addc    r13,        r4            ;zrHigh += xHigh


    ;mov.l    @r15+,        r0            ;i                POP!! 19
    dt        r10                        ;T = i-- == 0
    bf        innerLoop                ;if(T == 0) branch innerLoop
exitInnerLoop:
    ;mov.l    @r15+,        r0            ;i                POP!! 19

    ;tst        r10,        r10
    mov.l    @r15+,        r8            ;tempPixel        POP!! 18
    rotcl    r8                        ;tempPixel = (tempPixel << 1) + T

    mov.l    @r15+,        r1            ;col            POP!! 17
    mov        r1,            r0            ;col
    and     #7,         r0            ;col &= 7
    cmp/eq    #1,         r0            ;T = col == 1
    bf        bypassVRAM                ;if(T == 0) branch bypassVRAM

    mov        @(20,r15),    r0            ;vram
    mov.b    r8,         @r0            ;*vram = tempPixel
    add     #1,         r0            ;vram++
    mov        r0,            @(20,r15)    ;vram
bypassVRAM:

    mov        @(24,r15),    r3            ;lowIsz
    mov        @(28,r15),    r2            ;highIsz

    mov.l    @r15+,        r5            ;xLow            POP!! 16
    mov.l    @r15+,        r4            ;xHigh            POP!! 15

    clrt
    addc    r3,            r5            ;xLow += lowIsz; T = Carry
    addc    r2,            r4            ;xHigh += highIsz + T
    
    mov.l    @r15+,        r7            ;yLow            POP!! 14
    mov.l    @r15+,        r6            ;yHigh            POP!! 13

    dt        r1                        ;T = col-- == 0
    bf        col                        ;if(T == 0) branch col

    clrt
    addc    r3,            r7            ;yLow += lowIsz; T = Carry
    addc    r2,            r6            ;yHigh += highIsz + T

    mov.l    @r15+,        r0            ;row            POP!! 12
    dt        r0                        ;T = row-- == 0
    bf        row                        ;if(T == 0) branch row
    


    mov.l    @r15+,        r0            ;vram            POP!! 11

    mov.l    @r15+,        r0            ;highIsz        POP!! 10
    mov.l    @r15+,        r0            ;lowIsz            POP!! 9

    lds.l    @r15+,        pr            ;pr                POP!! 8
    mov.l    @r15+,        r14            ;r14            POP!! 7
    mov.l    @r15+,        r13            ;r13            POP!! 6
    mov.l    @r15+,        r12            ;r12            POP!! 5
    mov.l    @r15+,        r11            ;r11            POP!! 4
    mov.l    @r15+,        r10            ;r10            POP!! 3
    mov.l    @r15+,        r9            ;r9                POP!! 2
    mov.l    @r15+,        r8            ;r8                POP!! 1
    ;ldc.l    @r15+,        gbr            ;gbr            POP!! 0
    rts
    nop


sq64 function in asm
sq64:    ;Square 64bit number        ;sq64(nHigh, nLow);
;r0 = outHigh
;r1 = outLow
;r2 = tempLow
;r3 = temp
;r4 = nHigh
;r5 = nLow

    cmp/pz    r4                        ;T = nHigh >= 0
    bt        sqPositiveIn            ;if(T == 1) branch sqPositiveIn
    negc    r5,            r5            ;nLow  = -nLow
    negc    r4,            r4            ;nHigh = -nHigh
sqPositiveIn:                        ;n < 0 ? -n : n

    dmulu.l r5,         r4            ;nLow * nHigh
    sts     macl,        r2
    shll    r2
    sts     mach,        r1
    addc    r1,         r1
    movt    r0
    
    clrt
    dmulu.l r5,         r5            ;nLow * nLow
    sts     mach,        r3
    addc    r3,            r2

    dmulu.l r4,         r4            ;nHigh * nHigh
    sts     macl,        r3
    addc    r3,         r1
    sts     mach,        r3
    addc    r3,         r0
                                    ;   XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW    << 8
                                    ;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
    mov        r1,         r5
    shll8    r0                        ;000000xx    XXXXXX00
    shll8    r1                        ;000000yy    YYYYYY00

    mov        #-24,        r3            ;8-32
    shld    r3,         r5            ;000000YY    yyyyyy00
    shld    r3,         r2            ;000000ZZ    zzzzzz00
    
    clrt
    addc    r2,            r1            ;outLow  = YYYYYY00 + 000000ZZ
    addc    r5,            r0            ;outHigh = XXXXXX00 + 000000YY
    
    rts
    nop


mul64 function in asm
mul64:                             ;mul64(x1, x0, y1, y0)
                                ;       r4, r5, r6, r7
;Decimal point is 8bits right, from the left        1:7:56        Sign:Int:Frac

    mov.l    r11,        @-r15
    mov.l    r10,        @-r15
    mov.l    r9,         @-r15
    mov.l    r8,         @-r15
    mov.l    #0,            r0
    mov.l    #0,         r11

    cmp/ge    r11,        r4
    bt        mulPositiveX
    negc    r5,            r5
    negc    r4,            r4
    xor        #1,            r0
mulPositiveX:

    cmp/ge    r11,        r6
    bt        mulPositiveY
    negc    r7,            r7
    negc    r6,            r6
    xor        #1,            r0
mulPositiveY:

    mov.l    #0,            r1
    mov        r0,            r10

    dmulu.l r5,         r7            ;x0 * y0
    sts     mach,        r2

    dmulu.l r5,         r6            ;x0 * y1
    clrt
    sts     macl,        r8
    addc    r8,         r2
    sts     mach,        r8
    
    dmulu.l r4,         r7            ;x1 * y0
    addc    r8,         r1
    movt    r0
    clrt
    sts     macl,        r8
    addc    r8,         r2
    sts     mach,        r8

    dmulu.l r4,         r6            ;x1 * y1
    addc    r8,         r1
    addc    r11,        r0
    clrt
    sts     macl,        r8
    addc    r8,         r1
    sts     mach,        r8
    addc    r8,         r0
                                    ;   XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW
                                    ;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
    mov        r1,         r5
    shll8    r0                        ;    XXXXXX
    shll8    r1                        ;    YYYYYY

    mov        #-24,        r8            ;8-32        //Right
    shld    r8,         r5            ;YY
    shld    r8,         r2            ;ZZ
    
    add        r5,            r0
    add        r2,            r1
    
    cmp/eq    r11,        r10
    bt        mulPositiveN
    negc    r1,            r1            ;outLow
    negc    r0,            r0            ;outHigh
mulPositiveN:

    mov.l    @r15+,        r8
    mov.l    @r15+,        r9
    mov.l    @r15+,        r10
    mov.l    @r15+,        r11
    rts
    nop


hmm... the tabs aren't lining up correctly
(tabs are 4 spaces aligned)
RedCMD#4299 - Discord
Mandelbrot SNKEmini Minesweeper Sudoku
LephenixnoirHors ligneAdministrateurPoints: 16767 Défis: 140 Message

Citer : Posté le 06/01/2020 10:43 | #


Not bad at all! Do you have a compiled binary to try that out?
RedcmdHors ligneMembrePoints: 209 Défis: 5 Message

Citer : Posté le 06/01/2020 20:58 | # | Fichier joint


Heres a working prototype
You can't change anything, nor exit the program (need to restart the calc via the button on the back)
It isn't zooming in, but slowly increases iterations from 1 to infinity
Will release a fully working version later today

MANDEL.G1A
RedCMD#4299 - Discord
Mandelbrot SNKEmini Minesweeper Sudoku
LephenixnoirHors ligneAdministrateurPoints: 16767 Défis: 140 Message

Citer : Posté le 06/01/2020 21:13 | #


That looks nice and really fast even for 20 or 30 iterations. Good job!
RedcmdHors ligneMembrePoints: 209 Défis: 5 Message

Citer : Posté le 08/01/2020 01:12 | # | Fichier joint


I'm gonna be away for a while, so won't be able to do any coding
So here's an update on it
I'm managed to decode how ML_display_vram works in asm and have added to my code
Now you can see it display to screen as it generates
I don't redraw the entire screen every pixel, I only draw 8pixels to the screen every 8pixels
it was 14 instructions extra and in total, they only run 2816 extra instructions
Which is nothing compared to the millions of instructions being run to calculate all the pixels

It is super buggy right now tho, at certain zoom levels, moving the camera around does nothing
I'm not sure how to stop it generating while it's running via a keypress [EXIT]
Would you know how?
You can it exit the program when its finished via [MENU] (don't need to restart the calc :P)
I don't clear the screen every time the Mandel is redrawn, so you can see the old image get overwritten as the new one generates

MANDEL.G1A
RedCMD#4299 - Discord
Mandelbrot SNKEmini Minesweeper Sudoku
LephenixnoirHors ligneAdministrateurPoints: 16767 Défis: 140 Message

Citer : Posté le 12/01/2020 09:40 | #


Redcmd a écrit :
it was 14 instructions extra and in total, they only run 2816 extra instructions
Which is nothing compared to the millions of instructions being run to calculate all the pixels

This is correct, however even with the friendly SuperH platform here there is no equivalence between time and number of instructions. Remember that these instructions perform accesses to device memory and requires much longer than register arithmetic.

Hopefully for you this was measured and the total cost of updating the screen is usually 3.5ms or so. This is not too much. But it is still a lot more than the 13µs needed to clear the video RAM, even though the amount of updated memory is the same!

It is quite interactive already and I really like it. To stop generating when EXIT is pressed, you can check for IsKeyDown(KEY_CTRL_EXIT) at regular intervals, for instance whenever you finish a group of 4 rows. This test is not very costly, on SH4 calculators it is one device memory access followed by a binary mask.
RedcmdHors ligneMembrePoints: 209 Défis: 5 Message

Citer : Posté le 17/01/2020 07:39 | # | Fichier joint


Fixed a bunch of bugs and added the X and Y cords to the top left of the screen (Fixed point to string converter. Took me way too long to figure it out)
There still seems to be some bugs where it doesn't move at certain zoom levels, but shouldn't be too bad now
pressing [MENU] just takes you to the main menu, but now [EXIT] will quit the program and return to the main menu
But I'm having some problems with it crashing after pressing [EXIT]

Max zoom level has been increased from 2^48 to 2^60, but movement becomes limited after 2^54 due to the 64bit variables that store the position of the camera running out of space

Now I'm going to add an option to change the iterations manually (maybe even grayscale)
Stop it while it's still generating via [EXIT]
And clean up and optimize the code heavily

I found out today that addc and addv compute their T flag differently
I used addv because it doesn't have a + T allowing me to skip clrt which addc needs to be run before use
It was causing some weird speckle like patterns at higher zoom levels

Computing carries in C is a pain, so I made small functions in ASM to do it
Like sum64 and neg64

MANDEL.G1A

mandel.c
#include "fxlib.h"
#include "stdio.h"

#define TRUE 1
#define FALSE 0
#define abs(x) ((x) < 0 ? -(x) : (x))

#define ML_vram_adress (*(sc_cpv)sc0135)

typedef enum { ML_TRANSPARENT = -1, ML_WHITE, ML_BLACK, ML_XOR, ML_CHECKER } ML_Color;
typedef char* (*sc_cpv)(void);
const unsigned int sc0135[] = { 0xD201D002, 0x422B0009, 0x80010070, 0x0135 };

void ML_clear_vram() {
    int i, end, * pointer_long, vram;
    char* pointer_byte;
    vram = (int)ML_vram_adress();
    end = 4 - vram & 3;
    pointer_byte = (char*)vram;
    for (i = 0; i < end; i++) pointer_byte[i] = 0;
    pointer_long = (int*)(vram + end);
    for (i = 0; i < 255; i++) pointer_long[i] = 0;
    pointer_byte += 1020 + end;
    end = vram & 3;
    for (i = 0; i < end; i++) pointer_byte[i] = 0;
}

void ML_display_vram() {
    char* LCD_register_selector = (char*)0xB4000000, * LCD_data_register = (char*)0xB4010000, * vram;
    int i, j;
    vram = ML_vram_adress();
    for (i = 0; i < 64; i++) {
        *LCD_register_selector = 4;
        *LCD_data_register = i | 192;
        *LCD_register_selector = 4;
        *LCD_data_register = 0;
        *LCD_register_selector = 7;
        for (j = 0; j < 16; j++)
            *LCD_data_register = *vram++;
    }
}

void ML_horizontal_line(int y, int x1, int x2, ML_Color color) {
    int i;
    char checker;
    char* vram = ML_vram_adress();
    if (y & ~63 || (x1 < 0 && x2 < 0) || (x1 > 127 && x2 > 127))
        return;
    if (x1 > x2) {
        i = x1;
        x1 = x2;
        x2 = i;
    }
    if (x1 < 0)
        x1 = 0;
    if (x2 > 127)
        x2 = 127;
    switch (color) {
        case ML_BLACK:
            if (x1 >> 3 != x2 >> 3) {
                vram[(y << 4) + (x1 >> 3)] |= 255 >> (x1 & 7);
                vram[(y << 4) + (x2 >> 3)] |= 255 << 7 - (x2 & 7);
                for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
                    vram[(y << 4) + i] = 255;
            } else
                vram[(y << 4) + (x1 >> 3)] |= (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
            break;
        case ML_WHITE:
            if (x1 >> 3 != x2 >> 3) {
                vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
                vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
                for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
                    vram[(y << 4) + i] = 0;
            } else
                vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
            break;
        case ML_XOR:
            if (x1 >> 3 != x2 >> 3) {
                vram[(y << 4) + (x1 >> 3)] ^= 255 >> (x1 & 7);
                vram[(y << 4) + (x2 >> 3)] ^= 255 << 7 - (x2 & 7);
                for (i = (x1 >> 3) + 1; i < (x2 >> 3); i++)
                    vram[(y << 4) + i] ^= 255;
            } else
                vram[(y << 4) + (x1 >> 3)] ^= (255 >> ((x1 & 7) + 7 - (x2 & 7))) << (7 - (x2 & 7));
            break;
        case ML_CHECKER:
            checker = (y & 1 ? 85 : 170);
            if (x1 >> 3 != x2 >> 3) {
                vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
                vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
                vram[(y << 4) + (x1 >> 3)] |= checker & 255 >> (x1 & 7);
                vram[(y << 4) + (x2 >> 3)] |= checker & 255 << 7 - (x2 & 7);
                for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
                    vram[(y << 4) + i] = checker;
            } else {
                vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
                vram[(y << 4) + (x1 >> 3)] |= checker & (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
            }
            break;
    }
}

void ML_vertical_line(int x, int y1, int y2, ML_Color color) {
    int i, j;
    char checker, byte, * vram = ML_vram_adress();
    if (x & ~127 || (y1 < 0 && y2 < 0) || (y1 > 63 && y2 > 63)) return;
    if (y1 > y2) {
        int tmp = y1;
        y1 = y2;
        y2 = tmp;
    }
    if (y1 < 0) y1 = 0;
    if (y2 > 63) y2 = 63;

    i = (y1 << 4) + (x >> 3);
    j = (y2 << 4) + (x >> 3);
    switch (color) {
        case ML_BLACK:
            byte = 128 >> (x & 7);
            for (; i <= j; i += 16)
                vram[i] |= byte;
            break;
        case ML_WHITE:
            byte = ~(128 >> (x & 7));
            for (; i <= j; i += 16)
                vram[i] &= byte;
            break;
        case ML_XOR:
            byte = 128 >> (x & 7);
            for (; i <= j; i += 16)
                vram[i] ^= byte;
            break;
        case ML_CHECKER:
            byte = 128 >> (x & 7);
            checker = y1 & 1 ^ x & 1;
            for (; i <= j; i += 16) {
                if (checker) vram[i] &= ~byte;
                else vram[i] |= byte;
                checker = !checker;
            }
            break;
    }
}

void ML_pixel(int x, int y, ML_Color color) {
    char* vram = ML_vram_adress();
    if (x & ~127 || y & ~63) return;
    switch (color) {
        case ML_BLACK:
            vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
            break;
        case ML_WHITE:
            vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
            break;
        case ML_XOR:
            vram[(y << 4) + (x >> 3)] ^= 128 >> (x & 7);
            break;
        case ML_CHECKER:
            if (y & 1 ^ x & 1) vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
            else vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
            break;
    }
}

unsigned char HUD = TRUE;                //Heads Up Display: Cords, Zoom level & Max iteration: toggle with [F1]
char colour = ML_XOR;                    //Colour of camera rectangle: Black, White, Inverted, Transparent and Checkered
unsigned int iMax = 32;                    //max iterations
int screenZoom = 1;                        //zoom level on screen (camera rectangle)
unsigned int graphZoom = 1;                //zoom level for graph
int screenX = 0, screenY = 0;            //offset cords on screen from 0,0 for camera rectangle
int graphHighX = 0;
int graphHighY = 0;
int graphLowX = 0;
int graphLowY = 0;

void drawMandelbrot(int X0, int X1, int Y0, int Y1, int zoom, int iMax) {
    register char* vram = ML_vram_adress();

    int offsetHigh = zoom < 21 ? 0x04000000 >> zoom : 0;
    int offsetLow = zoom >= 21 ? 0x80000000 >> zoom : 0;
    sum64(&X0, &X1, offsetHigh, offsetLow);

    offsetHigh = zoom < 22 ? 0x02000000 >> zoom : 0;
    offsetLow = zoom >= 22 ? 0x80000000 >> zoom : 0;
    sum64(&Y0, &Y1, offsetHigh, offsetLow);

    drawMandel(zoom, vram, -Y0, -Y1, -X0, -X1, iMax);
    SaveDisp(1);
}

void reset() {
    HUD = TRUE;
    iMax = 32;
    colour = ML_XOR;
    screenZoom = 1;
    graphZoom = 1;
    screenX = 0;
    screenY = 0;
    graphHighX = 0;
    graphHighY = 0;
    graphLowX = 0;
    graphLowY = 0;
    ML_clear_vram();
    drawMandelbrot(graphHighX, graphLowX, graphHighY, graphLowY, graphZoom, iMax);
}

double divByPow(double n, double x, int p) {        //Divide OR Times n by x, p times (n / x^p): used for numbers bigger than 2^32 (int limit)
    if (p < 0)
        for (; p < 0; p++)
            n *= x;
    else
        for (; p > 0; p--)
            n /= x;
    return n;
}

char sprintFrac(unsigned char* string, int fixedPoint, int high, int low) {
    int tempHigh = 0, tempMid = 500000000, tempLow = 0;
    int outHigh = 0, outMid = 0, outLow = 0;
    int copyHigh = high, copyLow = low;
    unsigned int testHigh;
    unsigned int testLow;
    int i;
    unsigned char fracHigh[256];

    outHigh = abs(high) >> 32 - fixedPoint;

    i = 0;
    fixedPoint -= 4;
    testHigh = high;
    testLow = low;
    abs64(&testHigh, &testLow);
    testHigh <<= 4;
    testHigh += testLow >> 32 - fixedPoint;

    testLow &= (1 << 32 - fixedPoint) - 1;
    testHigh &= (1 << 32 - fixedPoint) - 1;
    do {
        testHigh *= 10;
        testLow *= 10;
        testHigh += testLow >> 32 - fixedPoint;
        fracHigh[i++] = '0' + (testHigh >> 32 - fixedPoint);
        testHigh &= (1 << 32 - fixedPoint) - 1;
        testLow &= (1 << 32 - fixedPoint) - 1;
    } while (testHigh || testLow);
    fracHigh[i] = '\0';

    sprintf(string, "%s%s%u.%s", string, high < 0 ? "-" : "+", outHigh, fracHigh);
}

int AddIn_main(int isAppli, unsigned short OptionNum) {        //Main function
    unsigned int key;                        //pause until key press

    unsigned char string[32];                //Used in converting int to string
    unsigned int graphMoveHigh, graphMoveLow;//amount graphX & Y changes by when moving camera rectangle around
    unsigned int screenMove;                //amount screenX & Y changes by when moving camera rectangle around
    int screenX1, screenX2;                    //corner X cords for drawing rectangle to screen
    int screenY1, screenY2;                    //corner Y cords for drawing rectangle to screen

    //64bit Fixed Point number format - spilt between two 32bit variables
    //1:7:56
    //Sign:Int:Frac
    //±:0000000:00000000000000000000000000000000000000000000000000000000
    //±0000000.000000000000000000000000,00000000000000000000000000000000
    //high = ±0000000.000000000000000000000000
    //low  = 00000000000000000000000000000000

    reset();

    do {

        screenMove = graphZoom > 55 ? 16 >> (screenZoom - (graphZoom - 55)) : screenZoom > 4 ? 1 : 16 >> screenZoom;

        if ((screenZoom > 4 ? graphZoom - screenZoom + 4 : graphZoom) < 24) {
            graphMoveHigh = 0x00800000 >> (screenZoom > 4 ? graphZoom - screenZoom + 4 : graphZoom);
            graphMoveLow = 0x00000000;
        } else if ((screenZoom > 4 ? graphZoom - screenZoom + 4 : graphZoom) < 56) {
            graphMoveHigh = 0x00000000;
            graphMoveLow = 0x80000000 >> (screenZoom > 4 ? graphZoom - screenZoom + 4 : graphZoom) - 24;
        } else {
            graphMoveHigh = 0x00000000;
            graphMoveLow = 0x00000001;
        }

        if (HUD == -1) {
            string[0] = '\0';
            sprintFrac(&string, 8, graphMoveHigh, graphMoveLow);
            PrintMini(7, 12, string, 0);

            sprintf(&string, "%u", graphZoom);
            PrintMini(0, 22, string, 0);
            sprintf(&string, "%u", screenZoom);
            PrintMini(0, 28, string, 0);
            sprintf(&string, "%u", screenMove);
            PrintMini(0, 34, string, 0);
            sprintf(&string, "%X", graphMoveLow);
            PrintMini(0, 40, string, 0);
        }

        GetKey(&key);
        switch (key) {
            case KEY_CHAR_PLUS:
                if (graphZoom < 65) {
                    graphZoom++;
                    screenZoom++;
                }
                break;
            case KEY_CHAR_MINUS:
                if (graphZoom) {
                    graphZoom--;
                    screenZoom--;
                }
                break;
            case KEY_CTRL_UP:
                screenY -= screenMove;
                sum64(&graphHighY, &graphLowY, graphMoveHigh, graphMoveLow);
                break;
            case KEY_CTRL_DOWN:
                screenY += screenMove;
                sub64(&graphHighY, &graphLowY, graphMoveHigh, graphMoveLow);
                break;
            case KEY_CTRL_LEFT:
                screenX -= screenMove;
                sum64(&graphHighX, &graphLowX, graphMoveHigh, graphMoveLow);
                break;
            case KEY_CTRL_RIGHT:
                screenX += screenMove;
                sub64(&graphHighX, &graphLowX, graphMoveHigh, graphMoveLow);
                break;
            case KEY_CTRL_F1:
                HUD = !HUD;
                break;
            case KEY_CTRL_F2:
                if (colour > ML_TRANSPARENT)
                    colour--;
                else
                    colour = ML_CHECKER;
                break;
            case KEY_CTRL_F3:
                //Gray scale, by refreshing screen multiple times per sec at different max iterations (iMax)
                break;
            case KEY_CTRL_AC:
                reset();
                break;
            case KEY_CTRL_EXE:
                //SetTimer(1, 200, stop);
                drawMandelbrot(graphHighX, graphLowX, graphHighY, graphLowY, graphZoom, iMax);
                //KillTimer(1);
                screenX = 0;
                screenY = 0;
                screenZoom = 1;
                break;
        }

        iMax = 8 * (graphZoom + 3);

        if (key != KEY_CTRL_AC && key != KEY_CTRL_EXE) {
            RestoreDisp(1);
            if (screenZoom < 8) {
                screenX1 = 65 - divByPow(128, 2, screenZoom) + screenX;
                screenX2 = 62 + divByPow(128, 2, screenZoom) + screenX;
                screenY1 = 32 - (screenZoom > 6 ? 1 : divByPow(64, 2, screenZoom)) + screenY;
                screenY2 = 31 + (screenZoom > 6 ? 0 : divByPow(64, 2, screenZoom)) + screenY;
                ML_horizontal_line(screenY1, screenX1, screenX2, colour);
                ML_horizontal_line(screenY2, screenX1, screenX2, colour);
                ML_vertical_line(screenX1 - 1, screenY1, screenY2, colour);
                ML_vertical_line(screenX2 + 1, screenY1, screenY2, colour);
            } else
                ML_pixel(screenX + 64, screenY + 31, colour);

            if (HUD == TRUE) {

                neg64(&graphHighX, &graphLowX);
                sprintf(&string, "X:");
                sprintFrac(&string, 8, graphHighX, graphLowX);
                PrintMini(0, 0, string, 0);
                neg64(&graphHighX, &graphLowX);

                sprintf(&string, "Y:");
                sprintFrac(&string, 8, graphHighY, graphLowY);
                PrintMini(0, 6, string, 0);

                sprintf(&string, "MaxI:%u", iMax);
                PrintMini(0, 53, string, 0);
                if (graphZoom > 32)
                    sprintf(&string, "Zoom:2^%ux", graphZoom - 1);
                else if (graphZoom > 0)
                    sprintf(&string, "Zoom:%ux", 1 << graphZoom - 1);
                else
                    sprintf(&string, "Zoom:0.5x");
                PrintMini(0, 59, string, 0);
            }
        }
    } while (key != KEY_CTRL_EXIT);

    return 1;
}


#pragma section _BR_Size
unsigned long BR_Size;
#pragma section
#pragma section _TOP
int InitializeSystem(int isAppli, unsigned short OptionNum) {
    return INIT_ADDIN_APPLICATION(isAppli, OptionNum);
}
#pragma section


mandelasm.src
.EXPORT _drawMandel
.EXPORT _neg64
.EXPORT _abs64
.EXPORT _sum64
.EXPORT _sub64
.EXPORT _shld64

.ALIGN    4






_drawMandel:                        ;drawMandel(zoom, vram, yHighStart, yLowStart, xHighStart, xLowStart, iMax);
;r0 = temp0
;r1 = temp1
;r2 = highIsz,        vram
;r3 = lowIsz
;r4 = zoom,            xHigh
;r5 = vram,            xLow
;r6 = yHighStart,    yHigh
;r7 = yLowStart,    yLow
;r8 = tempPixel
;r9 =
;r10 =
;r11 =
;r12 =
;r13 =
;r14 =
;@(,r15) = xHighStart
;@(,r15) = xLowStart
;@(,r15) = iMax
;@(,r15) = vram

;64bit Fixed Point number format - spilt between two 32bit variables
;1:7:56
;Sign:Int:Frac
;±:0000000:00000000000000000000000000000000000000000000000000000000
;±0000000.000000000000000000000000,00000000000000000000000000000000
;high =    ±0000000.000000000000000000000000
;low =    00000000000000000000000000000000

    ;stc.l    gbr,        @-r15        ;gbr            PUSH!! 0
    mov.l    r8,         @-r15        ;r8                PUSH!! 1
    mov.l    r9,         @-r15        ;r9                PUSH!! 2
    mov.l    r10,        @-r15        ;r10            PUSH!! 3
    mov.l    r11,        @-r15        ;r11            PUSH!! 4
    mov.l    r12,        @-r15        ;r12            PUSH!! 5
    mov.l    r13,        @-r15        ;r13            PUSH!! 6
    mov.l    r14,        @-r15        ;r14            PUSH!! 7
    sts.l    pr,            @-r15        ;pr                PUSH!! 8
    ;ldc        r15,        gbr            ;gbr = r15
;r2 = lowIsz    //±0000000.000000000000000000000000
;r3 = highIsz    //00000000000000000000000000000000
;r4 = zoom        //0 - 56
    
;highIsz = 0x00100000 >> zoom;
    neg        r4,            r4            ;zoom = -zoom
    mov.l    #1048576,    r2            ;highIsz = 0x00100000
    shld    r4,            r2            ;highIsz >>= zoom

;lowIsz = zoom < 21 ? 0x80000000 >> zoom - 21 : 0;
    mov.l    #2147483648,r3            ;lowIsz = 0x80000000
    add        #21,        r4            ;zoom += 21
    
    cmp/pl    r4                        ;T = zoom > 0
    bt/s    highBits                ;if(T == 1) branch high        //+delay Slot
    shld    r4,            r3            ;lowIsz >>= zoom
    
    mov        #0,            r2            ;highIsz = 0
highBits:

    mov.l    r2,            @-r15        ;highIsz        PUSH!! 9
    mov.l    r3,            @-r15        ;lowIsz            PUSH!! 10




;Main Loops
    mov.l    r5,            @-r15        ;vram            PUSH!! 11
    mov        #0,            r8            ;tempPixel = 0
    mov        #64,        r0            ;row = 64

row:                                ;for (row = 64; row > 0; row--) {
    mov.l    r0,            @-r15        ;row            PUSH!! 12
    mov        @(48,r15),    r4            ;xHighStart
    mov        @(52,r15),    r5            ;xLowStart
    
;char* LCD_register_selector = (char*)0xB4000000
    mov.l    #3019898880,r1
;*LCD_register_selector = 4
    mov        #4,            r3
    mov.b    r3,            @r1
;*LCD_data_register = row | 192
    add        #-1,        r0
    xor        #255,        r0
;char* LCD_data_register = (char*)0xB4010000
    mov.l    #3019964416,r2
    mov.b    r0,            @r2
;*LCD_register_selector = 4
    mov.b    r3,            @r1
;*LCD_data_register = 0
    mov        #0,            r3
    mov.b    r3,            @r2
;*LCD_register_selector = 7
    mov        #7,            r3
    mov.b    r3,            @r1

    mov.l    #128,        r1            ;col = 128

col:                                ;for (col = 128; col > 0; col--) {
    mov        @(56,r15),    r10            ;iMax
    mov.l    r6,            @-r15        ;yHigh            PUSH!! 13
    mov.l    r7,            @-r15        ;yLow            PUSH!! 14
    mov.l    r4,            @-r15        ;xHigh            PUSH!! 15
    mov.l    r5,            @-r15        ;xLow            PUSH!! 16
    mov.l    r1,            @-r15        ;col            PUSH!! 17
    mov.l    r8,            @-r15        ;tempPixel        PUSH!! 18

innerLoop:                             ;for (i = iMax; i != 0; i--) {
    
;r4 = zrHigh = xHigh
;r5 = zrLow     = xLow
;r6 = ziHigh = yHigh
;r7 = ziLow  = yLow

    mov        r4,            r8            ;zrHigh
    mov        r5,            r9            ;zrLow
    bsr        sq64                    ;zr2 = zr * zr
    nop
    mov        r0,            r13            ;zr2High
    mov        r1,            r14            ;zr2Low

    mov        r6,            r4            ;ziHigh
    mov        r7,            r5            ;ziLow
    bsr        sq64                    ;zi2 = zi * zi
    nop
    mov        r0,            r11            ;zi2High
    mov        r1,            r12            ;zi2Low
    
;if (zr2 + zi2 > 4)
    addc    r14,        r1            ;zi2Low += zr2Low
    addc    r13,        r0            ;zi2High += zr2High
    mov.l    #67108864,    r1            ;0x04000000    //±0000100.000000000000000000000000
    cmp/ge    r0,            r1            ;T = 0x04000000 >= zi2High
    bf        exitInnerLoop            ;if(T == 0) branch exitInnerLoop

;zi *= zr;
    mov        r8,            r4            ;zrHigh
    mov        r9,            r5            ;zrLow
    bsr        mul64                    ;zi *= zr
    nop

;zi += zi + y;
    clrt
    addc    r1,            r1            ;ziLow += ziLow
    addc    r0,            r0            ;ziHigh += ziHigh
    mov.l    @(16,r15),    r7            ;yLow
    mov.l    @(20,r15),    r6            ;yHigh
    clrt
    addc    r1,            r7            ;ziLow += yLow
    addc    r0,            r6            ;ziHigh += yHigh

;zr = zr2 - zi2 + x;
    clrt
    subc    r12,        r14            ;zr2Low -= zi2Low
    subc    r11,        r13            ;zr2High -= zi2High
    mov.l    @(8,r15),    r5            ;xLow
    mov.l    @(12,r15),    r4            ;xHigh
    clrt
    addc    r14,        r5            ;xLow += zr2Low
    addc    r13,        r4            ;xHigh += zr2High

    dt        r10                        ;T = i-- == 0
    bf        innerLoop                ;if(T == 0) branch innerLoop
exitInnerLoop:

    mov.l    @r15+,        r8            ;tempPixel        POP!! 18
    rotcl    r8                        ;tempPixel = (tempPixel << 1) + T

    mov.l    @r15+,        r1            ;col            POP!! 17
    mov        r1,            r0            ;col
    and     #7,         r0            ;col &= 7
    cmp/eq    #1,         r0            ;T = col == 1
    bf        bypassVRAM                ;if(T == 0) branch bypassVRAM

    mov.l    #3019964416,r2            ;LCD_data_register = 0xB4010000
    mov.b    r8,            @r2            ;*LCD_data_register = tempPixel
    mov        @(20,r15),    r0            ;vram
    mov.b    r8,         @r0            ;*vram = tempPixel
    add     #1,         r0            ;vram++
    mov        r0,            @(20,r15)    ;vram
bypassVRAM:

    mov        @(24,r15),    r3            ;lowIsz
    mov        @(28,r15),    r2            ;highIsz

    mov.l    @r15+,        r5            ;xLow            POP!! 16
    mov.l    @r15+,        r4            ;xHigh            POP!! 15

    clrt
    addc    r3,            r5            ;xLow += lowIsz; T = Carry
    addc    r2,            r4            ;xHigh += highIsz + T
    
    mov.l    @r15+,        r7            ;yLow            POP!! 14
    mov.l    @r15+,        r6            ;yHigh            POP!! 13

    dt        r1                        ;T = col-- == 0
    bf        col                        ;if(T == 0) branch col
    
    clrt
    addc    r3,            r7            ;yLow += lowIsz; T = Carry
    addc    r2,            r6            ;yHigh += highIsz + T

    mov.l    @r15+,        r0            ;row            POP!! 12
    dt        r0                        ;T = row-- == 0
    bf        row                        ;if(T == 0) branch row

    mov.l    @r15+,        r0            ;vram            POP!! 11

    mov.l    @r15+,        r0            ;highIsz        POP!! 10
    mov.l    @r15+,        r0            ;lowIsz            POP!! 9

    lds.l    @r15+,        pr            ;pr                POP!! 8
    mov.l    @r15+,        r14            ;r14            POP!! 7
    mov.l    @r15+,        r13            ;r13            POP!! 6
    mov.l    @r15+,        r12            ;r12            POP!! 5
    mov.l    @r15+,        r11            ;r11            POP!! 4
    mov.l    @r15+,        r10            ;r10            POP!! 3
    mov.l    @r15+,        r9            ;r9                POP!! 2
    mov.l    @r15+,        r8            ;r8                POP!! 1
    rts
    nop











sq64:    ;Square 64bit number        ;sq64(nHigh, nLow);
;r0 = outHigh
;r1 = outLow
;r2 = tempLower
;r3 = temp
;r4 = nHigh
;r5 = nLow

    cmp/pz    r4                        ;T = nHigh >= 0
    bt        sqPositiveIn            ;if(T == 1) branch sqPositiveIn
    negc    r5,            r5            ;nLow  = -nLow
    negc    r4,            r4            ;nHigh = -nHigh
sqPositiveIn:                        ;n < 0 ? -n : n

    dmulu.l r5,         r4            ;nLow * nHigh
    sts     macl,        r2            ;tempLower
    shll    r2
    sts     mach,        r1
    addc    r1,         r1
    movt    r0
    
    dmulu.l r5,         r5            ;nLow * nLow
    clrt
    sts     mach,        r3
    addc    r3,            r2

    dmulu.l r4,         r4            ;nHigh * nHigh
    sts     macl,        r3
    addc    r3,         r1
    sts     mach,        r3
    addc    r3,         r0
                                    ;   XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW    << 8
                                    ;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
    mov        r1,         r5
    shll8    r0                        ;000000xx    XXXXXX00
    shll8    r1                        ;000000yy    YYYYYY00

    mov        #-24,        r3            ;8-32
    shld    r3,         r5            ;000000YY    yyyyyy00
    shld    r3,         r2            ;000000ZZ    zzzzzz00
    
    clrt
    addc    r2,            r1            ;outLow  = YYYYYY00 + 000000ZZ
    addc    r5,            r0            ;outHigh = XXXXXX00 + 000000YY
    
    rts
    nop
    






    


mul64:                             ;mul64(x1, x0, y1, y0)
                                ;       r4, r5, r6, r7
;Decimal point is 8bits right, from the left        1:7:56        Sign:Int:Frac

    mov.l    r10,        @-r15
    mov.l    r9,         @-r15
    mov.l    r8,         @-r15
    mov.l    #0,            r0
    mov.l    #0,         r9

    cmp/pz    r4
    bt        mulPositiveX
    negc    r5,            r5
    negc    r4,            r4
    xor        #1,            r0
mulPositiveX:

    cmp/pz    r6
    bt        mulPositiveY
    negc    r7,            r7
    negc    r6,            r6
    xor        #1,            r0
mulPositiveY:

    mov.l    #0,            r1
    mov        r0,            r10

    dmulu.l r5,         r7            ;x0 * y0
    sts     mach,        r2

    dmulu.l r5,         r6            ;x0 * y1
    clrt
    sts     macl,        r8
    addc    r8,         r2
    sts     mach,        r8
    
    dmulu.l r4,         r7            ;x1 * y0
    addc    r8,         r1
    movt    r0
    clrt
    sts     macl,        r8
    addc    r8,         r2
    sts     mach,        r8

    dmulu.l r4,         r6            ;x1 * y1
    addc    r8,         r1
    addc    r9,            r0
    clrt
    sts     macl,        r8
    addc    r8,         r1
    sts     mach,        r8
    addc    r8,         r0
                                    ;   XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW
                                    ;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
    mov        r1,         r5
    shll8    r0                        ;    XXXXXX
    shll8    r1                        ;    YYYYYY

    mov        #-24,        r8            ;8-32        //Right
    shld    r8,         r5            ;YY
    shld    r8,         r2            ;ZZ
    
    add        r5,            r0
    add        r2,            r1
    
    cmp/eq    r9,            r10
    bt        mulPositiveN
    negc    r1,            r1            ;outLow
    negc    r0,            r0            ;outHigh
mulPositiveN:

    mov.l    @r15+,        r8
    mov.l    @r15+,        r9
    mov.l    @r15+,        r10
    rts
    nop
    



    

_neg64:                                ;neg64(&high, &low)
    clrt
    mov.l    @r5,        r0
    negc    r0,            r0
    mov.l    r0,            @r5

    mov.l    @r4,        r0
    negc    r0,            r0
    mov.l    r0,            @r4

    rts
    nop



_abs64:                                ;abs64(&high, &low)
    mov.l    @r4,        r0
    cmp/pz    r0
    bt/s    positiveABS
    clrt

    mov.l    @r5,        r1
    negc    r1,            r1
    negc    r0,            r0
    mov.l    r1,            @r5

positiveABS:
    rts
    mov.l    r0,            @r4



_sum64:                                ;sum64(&highN, &lowN, highM, lowM)
    clrt
    mov.l    @r5,        r0
    addc    r7,            r0
    mov.l    r0,            @r5
    
    mov.l    @r4,        r0
    addc    r6,            r0
    rts
    mov.l    r0,            @r4



_sub64:                                ;sub64(&highN, &lowN, highM, lowM)
    clrt
    mov.l    @r5,        r0
    subc    r7,            r0
    mov.l    r0,            @r5
    
    mov.l    @r4,        r0
    subc    r6,            r0
    rts
    mov.l    r0,            @r4


.ALIGN    4

.END

RedCMD#4299 - Discord
Mandelbrot SNKEmini Minesweeper Sudoku
Pages : Précédente1, 2, 3, 4, 5

Planète Casio v42 © créé par Neuronix et Muelsaco 2004 - 2020 | Il y a 49 connectés | Nous contacter | Qui sommes-nous ? | Licences et remerciements

Planète Casio est un site communautaire non affilié à Casio. Toute reproduction de Planète Casio, même partielle, est interdite.
Les programmes et autres publications présentes sur Planète Casio restent la propriété de leurs auteurs et peuvent être soumis à des licences ou copyrights.
CASIO est une marque déposée par CASIO Computer Co., Ltd