The dataMakerpy program will create a bunch of binary record

The dataMaker.py program will create a bunch of binary records. It uses the words in wordlist2.txt (which is a list of words 776521 long) to create a binary file relation.bin. Each record consists of 2 long integers followed by 2 strings of length 10. Therefore, each record is 36 bytes long. Use the dataMaker.py program to generate your own data file (its approximately 1 GB).

dataMaker.py

import random
import struct

oFile = open( \'relation.bin\', \'wb\')

random.seed(1)

#40 for a gig file
for k in range( 40 ):
wFile = open( \'wordlist2.txt\', \'r\')
for i,w in enumerate( wFile ):
#if i > 10:
# break
  
newW = \'\'
newW2 = \'\'
for j in range( len( w) ):
if w[j].isalpha():
newW = newW + w[j]
if len(newW) >= 10:
break
for j in range( 10, len( w) ):
if w[j].isalpha():
newW2 = newW2 + w[j]
if len(newW2) >= 10:
break
  
# pad the words
for j in range( len(newW), 10 ):
newW = newW + \' \'

for j in range( len(newW2), 10 ):
newW2 = newW2 + \' \'
  
n1 = random.randint( 0, 100000)
n2 = random.randint( 0, 100000)
  
newW = [ord(c) for c in newW]
newW2 = [ord(c) for c in newW2]
  
a = \'\'
a = a+ struct.pack( \'>q\', n1 )
a = a+ struct.pack( \'>q\', n2 )
for c in newW:
a = a + struct.pack( \'b\', int(c))
  
  
for c in newW2:
a = a + struct.pack( \'b\', int(c))
  
oFile.write( a )
wFile.close()

oFile.close()

exit()

Write an external merge sort algorithm in Python to sort the data in ascending order according to 1st string. You may use any reasonable definition of \"less than\" for strings. You must sort all of the data in 100MB of memory! The data in the file will not be exactly aligned on word boundarys; you may use an extra 100 MB of memory (roughly) into which you unpack the binary data read from the data file. For example, if you define a struct for each record, then make an array of structs, then fill that array with data read from disk, you will be able to your language’s built in sort routine. You must then write a program that asks the user to a query string, and returns any matches.

Solution

Dear

As I can help you that I can share the algo n merge sort program that help you in your program . But if you follow correct step i hope your problem will solve

We take two rray for this example

Program are as follow

void merge(int arr[], int l, int m, int r)

{

    int i, j, k;

    int n1 = m - l + 1;

    int n2 = r - m;

/* create a temp arrays */

    int L[n1], R[n2];

/* Copy data to temp arrays L[] and R[] */

    for (i = 0; i < n1; i++)

        L[i] = arr[l + i];

    for (j = 0; j < n2; j++)

        R[j] = arr[m + 1+ j];

/* Merge the temp arrays back into arr[l..r]*/

    i = 0, j = 0, k = l;

    while (i < n1 && j < n2)

    {

        if (L[i] <= R[j])

        {

            arr[k] = L[i];

            i++;

        }

        else

        {

            arr[k] = R[j];

            j++;

        }

        k++;

    }

/* Copy the remaining elements of L[], if there

       are any */

    while (i < n1)

    {

        arr[k] = L[i];

        i++;

        k++;

    }

/* Copy the remaining elements of R[], if there

       are any */

    while (j < n2)

    {

        arr[k] = R[j];

        j++;

        k++;

    }

}

/* l is for left index and r is right index of the

   sub-array of arr to be sorted */

void mergeSort(int arr[], int l, int r)

{

    if (l < r)

    {

int m = l+(r-l)/2;

mergeSort(arr, l, m);

        mergeSort(arr, m+1, r);

merge(arr, l, m, r);

    }

}

/* Function to print an array */

void printArray(int A[], int size)

{

    int i;

    for (i=0; i < size; i++)

        printf(\"%d \", A[i]);

    printf(\"\ \");

}

/* Driver program to test above functions */

int main()

{

    int arr[] = {12, 11, 13, 5, 6, 7};

    int arr_size = sizeof(arr)/sizeof(arr[0]);

    printf(\"Given array is \ \");

    printArray(arr, arr_size);

    mergeSort(arr, 0, arr_size - 1);

    printf(\"\ Sorted array is \ \");

    printArray(arr, arr_size);

    return 0;

}

The dataMaker.py program will create a bunch of binary records. It uses the words in wordlist2.txt (which is a list of words 776521 long) to create a binary fil
The dataMaker.py program will create a bunch of binary records. It uses the words in wordlist2.txt (which is a list of words 776521 long) to create a binary fil
The dataMaker.py program will create a bunch of binary records. It uses the words in wordlist2.txt (which is a list of words 776521 long) to create a binary fil
The dataMaker.py program will create a bunch of binary records. It uses the words in wordlist2.txt (which is a list of words 776521 long) to create a binary fil

Get Help Now

Submit a Take Down Notice

Tutor
Tutor: Dr Jack
Most rated tutor on our site