Problem 3

Posted on Dec 2, 2022

Problem

Find the largest prime factor of a given number, in this case: 600851475143. A factor of n is a number such that n divided by the factor gives a remainder of zero. A prime factor is a factor which is also a prime number, i.e. it has no factors other than 1 and the number itself.

General solution

A brute-force solution to this problem would involve dividing n by every number between 2 and n - 1. If the remainder is zero, then the number is a factor, f, of n and must be checked to see if it is prime. A simple way to do this is to divide f by every number between 2 and f - 1. If none of these divisions result in a remainder of zero then f is prime.

Unfortunately the simple solution results in:

  • n - 2 division operations to find the factors.
  • f - 2 division operations for each factor to check for primeness.

Divisions are an expensive arithmetic operation for CPUs, so we want to minimise them. The highest level at which we can reduce operations is finding the factors of n, because each potential factor p involves 1 division operation (to check if it is a factor) and then up to p - 2 divisions to check for primeness.

One property of factors that is relevant here is that if f is a factor of n, there must be another factor, f’, such that f * f’ = n. This means that if we test all the factors up to n / 2, we will have found all the other factors. This reduces the number of factor checks by 50%.

However, we can do better than checking factors up to n / 2. Since f will always be less than or equal to f’ (assuming we start checking factors from 2 and work upwards), the worst case scenario will be when n is a square number, where f = f’. Therefore, we only need to check factors up to the square root of n, as any factors above sqrt(n) will be paired with a factor below sqrt(n). This will always be less than n - 2 if n > 4.

Once we have our list of factors, we need to find the largest one which is prime. Again we can use the square root shortcut to reduce the number of divisions.

We can also reduce the number of divisions by only attempting to factorise f by prime numbers. This works because every composite (i.e. not prime) number can be expressed as the product of prime numbers, and therefore has at least two prime factors. If a number has no prime factors (other than itself) then it is prime.

Finally, as we are looking for the highest prime factor, we can process the factors in descending order. The first prime factor we come to must be the highest prime factor. This avoids having to check all the prime factors and keep a record of the highest, at the cost of having to sort the factors first (but sorting is generally a cheap operation).

In order to factorise by prime numbers, we need to generate a list of primes. One way to do this is by implementing the Sieve of Eratosthenes, which works as follows:

  1. Create a list of all numbers up to the target.
  2. Starting at 2 (the first prime), mark every second number as not prime, as it must be divisible by 2.
  3. Find the next prime number after 2 and repeat the process, i.e. mark every third number as not prime.
  4. Continue moving through the grid until you reach the last number.

The major advantage of this algorithm is that it avoids expensive divisions. The only calculation required is to increment the current position by the current prime number, which is a relatively cheap operation on pretty much every processor in existence. The downside is that an array of integers needs to be stored in memory. If we assume an integer is 8 bytes (64 bits), then the total memory is 8 multiplied by the highest number. In this case, the numbers are small enough to be workable, however we would not be able to use this algorithm if the target was a much larger number.

Solution: Go

package main

import (
	"fmt"
	"math"
	"os"
	"sort"
)

func getPrimes(maxNumber int) []int {
	const UNKNOWN = 0
	const NOT_PRIME = 1
	const IS_PRIME = 2
	const FIRST_PRIME = 2

	primes := []int{}

	// Use make for the sieve because we know the size and everything should
	// be populated with zero
	sieve := make([]int, maxNumber+1)
	sieve[FIRST_PRIME] = IS_PRIME

	for sieveIndex, currentPrime := FIRST_PRIME, FIRST_PRIME; currentPrime != 0 && sieveIndex <= maxNumber; sieveIndex++ {
		// Assume this prime will be the last
		nextPrime := 0
		step := currentPrime

		// Mark all multiples of current prime as non-prime
		for j := currentPrime + step; j <= maxNumber; j += step {
			sieve[j] = NOT_PRIME
		}

		// Find the next unknown element after the current prime
		// This will be the next prime
		for k := currentPrime + 1; nextPrime == 0 && k <= maxNumber; k++ {
			if sieve[k] == UNKNOWN {
				sieve[k] = IS_PRIME
				nextPrime = k
			}
		}

		// Set current prime to be the next prime
		// If there is no next prime (nextPrime == 0), this will end
		// the for loop as currentPrime != 0 will be false
		currentPrime = nextPrime
	}

	// We now have a sieve with all the numbers flagged as prime/not prime
	// Extract just the primes and return as a slice
	for s := FIRST_PRIME; s <= maxNumber; s++ {
		if sieve[s] == IS_PRIME {
			primes = append(primes, s)
		}
	}

	return primes
}

func main() {
	const TARGET = 600851475143

	highestPossibleFactor := int(math.Floor(math.Sqrt(TARGET)))

	// Build list of factors from 2 to the highest possible factor
	factors := []int{}

	for candidateFactor := 2; candidateFactor < highestPossibleFactor; candidateFactor++ {
		remainder := TARGET % candidateFactor
		if remainder == 0 {
			// candidateFactor is a factor, so find its other side
			otherFactor := TARGET / candidateFactor
			factors = append(factors, candidateFactor, otherFactor)
		}
	}

	// Bail out if we do not have any factors, i.e. TARGET is itself prime
	if len(factors) == 0 {
		fmt.Println("Target number is prime and therefore has no factors")
		os.Exit(1)
	}

	// Sort factors so we can process them from highest to lowest
	sort.Slice(factors, func(a, b int) bool {
		return factors[a] > factors[b]
	})

	// To find whether a factor is prime, we divide it by all the prime numbers
	// less than its square root
	highestFactor := factors[0]
	maxPrimeCandidate := int(math.Floor(math.Sqrt(float64(highestFactor))))
	primes := getPrimes(maxPrimeCandidate)

	for f := range factors {
		// Assume a factor is prime until we factorise it
		isPrime := true

		for p := 0; isPrime && p < len(primes) && primes[p] < factors[f]; p++ {
			remainder := factors[f] % primes[p]
			if remainder == 0 {
				isPrime = false
			}
		}

		if isPrime {
			// This must be the highest prime factor as we process the factors
			// in descending order, therefore we can print it and exit
			fmt.Println(factors[f])
			os.Exit(0)
		}
	}

	// If we get this far, no prime factor was found - i.e. all the factors
	// of the target are composite numbers
	fmt.Println("Target number has no prime factors")
	os.Exit(1)
}

Solution: PHP

<?php

define('SIEVE_UNKNOWN', -1);
define('SIEVE_NOT_PRIME', 0);
define('SIEVE_PRIME', 1);
define('FIRST_PRIME', 2);

function prime_numbers(int $max_prime_candidate) : array
{
    $sieve = array_fill(0, $max_prime_candidate + 1, SIEVE_UNKNOWN);
    $primes = [];

    $sieve[FIRST_PRIME] = SIEVE_PRIME;
    $current_prime = FIRST_PRIME;
    $primes_found = 1;

    for ($i = FIRST_PRIME; $current_prime != 0 && $i <= $max_prime_candidate; $i++)
    {
        $next_prime = 0; // assume this prime will be the last
        $step = $current_prime;

        // Mark all multiples of current prime as non-prime
        for ($j = $current_prime + $step; $j <= $max_prime_candidate; $j += $step)
        {
            $sieve[$j] = SIEVE_NOT_PRIME;
        }

        // Find the next unknown element after the current prime,
        // this is the next prime
        for ($k = $current_prime + 1; $next_prime === 0 && $k <= $max_prime_candidate; $k++)
        {
            if ($sieve[$k] === SIEVE_UNKNOWN)
            {
                $sieve[$k] = SIEVE_PRIME;
                $next_prime = $k;
                $primes_found++;
            }
        }

        // Set current prime to be the next prime.
        // If there is no next prime ($next_prime === 0), this will end the search.
        $current_prime = $next_prime;
    }

    for ($s = FIRST_PRIME; $s <= $max_prime_candidate; $s++)
    {
        if ($sieve[$s] === SIEVE_PRIME)
        {
            $primes[] = $s;
        }
    }

    return $primes;
}
<?php

declare(strict_types=1);
error_reporting(E_ALL);

require_once __DIR__ . '/../primes.php';

define('FACTORISE_TARGET', 600851475143);

// Highest possible factor at the start is the square root of the target
$highest_possible_factor = intval(floor(sqrt(FACTORISE_TARGET)));

// Build list of factors from 2 to the highest possible factor
$factors = [];

for ($candidate_factor = 2; $candidate_factor < $highest_possible_factor; $candidate_factor++)
{
    if (FACTORISE_TARGET % $candidate_factor === 0)
    {
        $other_factor = FACTORISE_TARGET / $candidate_factor;
        $factors[] = $candidate_factor;
        $factors[] = $other_factor;

        // Because we check factors from the lowest value, the other factor is
        // potentially the new highest possible factor
        if ($highest_possible_factor > $other_factor)
        {
            $highest_possible_factor = $other_factor;
        }
    }
}

// If we have found no factors, the target is prime and therefore has
// no highest prime factor
if (count($factors) === 0)
{
    die("No prime factors found\n");
}

// Sort factors
sort($factors, SORT_NUMERIC);

// Which of the factors are prime? First build an array of primes up to the
// square root of the highest factor.
// The alternative is to build an array of primes up to the highest factor, as
// that would allow us to do a lookup instead of multiple divisions, however
// that would also increase the memory requirements for the array by an order
// of highest_factor (which could be >1m).
$highest_factor = $factors[count($factors) - 1];
$max_prime_candidate = intval(floor(sqrt($highest_factor)));
$primes = prime_numbers($max_prime_candidate);

$highest_prime_factor = 0;

$factors_count = count($factors);
$primes_count = count($primes);

// Check each factor for primeness by dividing it by our primes. All composite
// numbers have at least one prime factor.
for ($i = 0; $i < $factors_count; $i++)
{
    // Assume a target is prime until we successfully factorise it
    $target_factor = $factors[$i];
    $is_prime = true;

    for ($j = 0; $is_prime && $j < $primes_count && $primes[$j] < $target_factor; $j++)
    {
        if ($target_factor % $primes[$j] === 0)
        {
            $is_prime = false;
        }
    }

    if ($is_prime)
    {
        $highest_prime_factor = $target_factor;
    }
}

print("$highest_prime_factor\n");

Solution: C

C does not have growable arrays (slices in Go) so we have to use a linked list instead. This means the code is more verbose and relies on the Glib library.

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <inttypes.h>

#include <math.h>

#include <glib.h>

GSList* prime_numbers(uint32_t max_prime_candidate)
{
  const int8_t SIEVE_UNKNOWN = -1;
  const int8_t SIEVE_NOT_PRIME = 0;
  const int8_t SIEVE_PRIME = 1;

  const uint8_t FIRST_PRIME = 2;

  uint32_t current_prime = 0;
  uint32_t primes_found = 0;

  int8_t* sieve = (int8_t*) calloc(max_prime_candidate + 1, sizeof(int8_t));

  // Set the status of all numbers to unknown
  for (uint32_t i = 0; i <= max_prime_candidate; i++)
  {
    sieve[i] = SIEVE_UNKNOWN;
  }

  sieve[FIRST_PRIME] = SIEVE_PRIME;
  current_prime = FIRST_PRIME;
  primes_found = 1;

  for (uint32_t i = FIRST_PRIME; current_prime != 0 && i <= max_prime_candidate; i++)
  {
    uint32_t next_prime = 0; // assume this prime will be the last
    uint32_t step = current_prime;

    // Mark all multiples of current prime as non-prime
    for (uint32_t j = current_prime + step; j <= max_prime_candidate; j += step)
    {
      sieve[j] = SIEVE_NOT_PRIME;
    }

    // Find the next unknown element after the current prime - this is the next prime
    for (uint32_t k = current_prime + 1; next_prime == 0 && k <= max_prime_candidate; k++)
    {
      if (sieve[k] == SIEVE_UNKNOWN)
      {
        sieve[k] = SIEVE_PRIME;
        next_prime = k;
        primes_found++;
      }
    }

    // Set current prime to be the next prime. If there is no next prime, this
    // will end the search
    current_prime = next_prime;
  }

  // Create a linked list to return to the caller
  GSList* primes = NULL;

  for (uint32_t i = FIRST_PRIME; i <= max_prime_candidate; i++)
  {
    if (sieve[i] == SIEVE_PRIME)
    {
      primes = g_slist_append(primes, GUINT_TO_POINTER(i));
    }
  }

  free(sieve);

  return primes;
}

gint compare_int(gconstpointer a, gconstpointer b)
{
  uint32_t a_comp = GPOINTER_TO_UINT(a);
  uint32_t b_comp = GPOINTER_TO_UINT(b);

  if (a_comp < b_comp)
  {
    return -1;
  }
  else if (a_comp == b_comp)
  {
    return 0;
  }
  else
  {
    return 1;
  }
}

int main(void)
{
  const uint64_t FACTORISE_TARGET = 600851475143;

  // Highest possible factor at the start is the square root of the target
  uint32_t highest_possible_factor = floor(sqrt(FACTORISE_TARGET));

  // Build list of factors from 2 to the highest possible factor
  GSList* factors = NULL;

  for (uint32_t candidate_factor = 2; candidate_factor < highest_possible_factor; candidate_factor++)
  {
    printf("Factorising %lu with %u\n", FACTORISE_TARGET, candidate_factor);

    if (FACTORISE_TARGET % candidate_factor == 0)
    {
      uint32_t other_factor = FACTORISE_TARGET / candidate_factor;

      factors = g_slist_append(factors, GUINT_TO_POINTER(candidate_factor));
      factors = g_slist_append(factors, GUINT_TO_POINTER(other_factor));

      // Because we check candidate factors from the lowest value, the other
      // factor is potentially the highest possible factor
      if (highest_possible_factor > other_factor) {
        highest_possible_factor = other_factor;
      }
    }
  }

  // If we have found no factors, the target is prime and therefore has no
  // highest prime factor
  if (factors == NULL)
  {
    fprintf(stderr, "No prime factors found\n");
    g_slist_free(factors);
    return EXIT_FAILURE;
  }

  // Sort factors
  factors = g_slist_sort(factors, &compare_int);

  // Print list of factors
  for (GSList* current_factor = factors; current_factor != NULL; current_factor = current_factor->next)
  {
    printf("Factor: %u\n", GPOINTER_TO_UINT(current_factor->data));
  }

  // Which of the factors are prime? First build an array of primes up to the
  // square root of the highest factor.
  // The alternative is to build an array of primes up to the highest factor, as
  // that would allow us to do a lookup instead of multiple divisions, however
  // that would also increase the memory requirements for the array by an order
  // of highest_factor (which could be >1m).
  uint32_t highest_factor = GPOINTER_TO_UINT(g_slist_last(factors)->data);
  uint32_t max_prime_candidate = floor(sqrt(highest_factor));
  GSList* primes = prime_numbers(max_prime_candidate);

  for (GSList* current_prime = primes; current_prime != NULL; current_prime = current_prime->next)
  {
    printf("Prime: %u\n", GPOINTER_TO_UINT(current_prime->data));
  }

  uint32_t highest_prime_factor = 0;

  // Check each factor for primeness by dividing it by our primes. All composite
  // numbers have at least one prime factor.
  for (GSList* current_factor = factors; current_factor != NULL; current_factor = current_factor->next)
  {
    // Assume a target is prime unless we successively factorise it
    uint32_t target_factor = GPOINTER_TO_UINT(current_factor->data);
    bool is_prime = true;

    printf("Checking %u for primeness\n", target_factor);

    for (GSList* current_prime = primes; is_prime && current_prime != NULL && GPOINTER_TO_UINT(current_prime->data) < target_factor; current_prime = current_prime->next)
    {
      if (target_factor % GPOINTER_TO_UINT(current_prime->data) == 0)
      {
        is_prime = false;
        printf("%u is not prime, is divisible by: %u\n", target_factor, GPOINTER_TO_UINT(current_prime->data));
      }
    }

    if (is_prime)
    {
      printf("%u is prime\n", target_factor);
      highest_prime_factor = target_factor;
    }
  }

  // We do not have to check for highest prime factor being 0 because all
  // composite numbers have at least one prime factor
  printf("Highest prime factor of %lu is %u\n", FACTORISE_TARGET, highest_prime_factor);

  g_slist_free(primes);
  g_slist_free(factors);

  return EXIT_SUCCESS;
}