using System.Collections;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.IO.Compression;
private static int stopLine = 3000000;
public static void Main()
string input = @" <text left=""2332"" top=""709"" width=""206"" height=""52"" font-face=""with arabic_00001.ttf"" font-size=""9"">Receipt #</text>
<text left=""2652"" top=""710"" width=""168"" height=""52"" font-face=""with arabic_00002.ttf"" font-size=""9"">0000001</text>
<text left=""2438"" top=""806"" width=""100"" height=""52"" font-face=""with arabic_00001.ttf"" font-size=""9"">Date</text>
<text left=""2593"" top=""807"" width=""228"" height=""52"" font-face=""with arabic_00002.ttf"" font-size=""9"">07/26/2023</text>
<text left=""480"" top=""259"" width=""333"" height=""52"" font-face=""with arabic_00002.ttf"" font-size=""9"">Ahmed Mohamed</text>
<text left=""480"" top=""298"" width=""436"" height=""52"" font-face=""with arabic_00002.ttf"" font-size=""9"">30 Imbaba Amr STreet</text>
<text left=""480"" top=""667"" width=""177"" height=""52"" font-face=""with arabic_00002.ttf"" font-size=""9"">Sayed Ali</text>
<text left=""480"" top=""705"" width=""175"" height=""52"" font-face=""with arabic_00002.ttf"" font-size=""9"">50 Algiza</text>
<text left=""2366"" top=""248"" width=""203"" height=""118"" font-face=""with arabic_00003.ttf"" font-size=""19.5"" color=""#ff2b2b2b"">R E C</text>
<text left=""2583"" top=""248"" width=""93"" height=""118"" font-face=""with arabic_00003.ttf"" font-size=""19.5"" color=""#ff2b2b2b"">E I</text>
<text left=""2690"" top=""248"" width=""120"" height=""118"" font-face=""with arabic_00003.ttf"" font-size=""19.5"" color=""#ff2b2b2b"">P T</text>
<text left=""509"" top=""989"" width=""84"" height=""59"" font-face=""with arabic_00003.ttf"" font-size=""10"">Item</text>
<text left=""761"" top=""989"" width=""226"" height=""59"" font-face=""with arabic_00003.ttf"" font-size=""10"">Description</text>
<text left=""2088"" top=""989"" width=""192"" height=""59"" font-face=""with arabic_00003.ttf"" font-size=""10"">Unit Price</text>
<text left=""2377"" top=""989"" width=""167"" height=""59"" font-face=""with arabic_00003.ttf"" font-size=""10"">Quantity</text>
<text left=""2640"" top=""989"" width=""156"" height=""59"" font-face=""with arabic_00003.ttf"" font-size=""10"">Amount</text>
<text left=""509"" top=""1077"" width=""134"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">Product</text>
<text left=""761"" top=""1077"" width=""201"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">Item No 01</text>
<text left=""2155"" top=""1077"" width=""123"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">120.00</text>
<text left=""2463"" top=""1077"" width=""79"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">2.00</text>
<text left=""2671"" top=""1077"" width=""123"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">240.00</text>
<text left=""509"" top=""1165"" width=""134"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">Product</text>
<text left=""761"" top=""1165"" width=""201"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">Item No 02</text>
<text left=""2177"" top=""1165"" width=""101"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">30.00</text>
<text left=""2463"" top=""1165"" width=""79"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">1.00</text>
<text left=""2693"" top=""1165"" width=""101"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">30.00</text>
<text left=""509"" top=""1252"" width=""134"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">Product</text>
<text left=""761"" top=""1252"" width=""201"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">Item No 03</text>
<text left=""2155"" top=""1252"" width=""123"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">302.00</text>
<text left=""2463"" top=""1252"" width=""79"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">2.00</text>
<text left=""2671"" top=""1252"" width=""123"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">604.00</text>
<text left=""509"" top=""1340"" width=""147"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">Expense</text>
<text left=""761"" top=""1340"" width=""63"" height=""48"" font-face=""with arabic_00000.ttf"" font-size=""8.5"">هلابه</text>
<text left=""814"" top=""1340"" width=""12"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5""> </text>
<text left=""826"" top=""1340"" width=""31"" height=""48"" font-face=""with arabic_00000.ttf"" font-size=""8.5"">وبا</text>
<text left=""858"" top=""1340"" width=""12"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">
<text left=""870"" top=""1340"" width=""53"" height=""48"" font-face=""with arabic_00000.ttf"" font-size=""8.5"">ديسلا</text>
<text left=""924"" top=""1340"" width=""12"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">
<text left=""936"" top=""1340"" width=""55"" height=""48"" font-face=""with arabic_00000.ttf"" font-size=""8.5"">ىلع</text>
<text left=""982"" top=""1340"" width=""12"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">
<text left=""995"" top=""1340"" width=""78"" height=""48"" font-face=""with arabic_00000.ttf"" font-size=""8.5"">باسح</text>
<text left=""2155"" top=""1340"" width=""123"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">200.00</text>
<text left=""2463"" top=""1340"" width=""79"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">1.00</text>
<text left=""2671"" top=""1340"" width=""123"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">200.00</text>
<text left=""509"" top=""1428"" width=""131"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">Service</text>
<text left=""761"" top=""1428"" width=""151"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">Nawloon</text>
<text left=""2155"" top=""1428"" width=""123"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">120.00</text>
<text left=""2463"" top=""1428"" width=""79"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">1.00</text>
<text left=""2671"" top=""1428"" width=""123"" height=""48"" font-face=""with arabic_00002.ttf"" font-size=""8.5"">120.00</text>
<text left=""483"" top=""1768"" width=""13"" height=""52"" font-face=""with arabic_00002.ttf"" font-size=""9"">
<text left=""1942"" top=""1641"" width=""180"" height=""52"" font-face=""with arabic_00001.ttf"" font-size=""9"">Subtotal</text>
<text left=""2622"" top=""1642"" width=""173"" height=""52"" font-face=""with arabic_00002.ttf"" font-size=""9"">1,194.00</text>
<text left=""1942"" top=""1729"" width=""108"" height=""52"" font-face=""with arabic_00001.ttf"" font-size=""9"">Total</text>
<text left=""2622"" top=""1730"" width=""173"" height=""52"" font-face=""with arabic_00002.ttf"" font-size=""9"">1,194.00</text>
<text left=""1942"" top=""1802"" width=""273"" height=""52"" font-face=""with arabic_00001.ttf"" font-size=""9"">Amount Paid</text>
<text left=""2622"" top=""1803"" width=""173"" height=""52"" font-face=""with arabic_00002.ttf"" font-size=""9"">1,194.00</text>
<text left=""1942"" top=""1896"" width=""264"" height=""52"" font-face=""with arabic_00001.ttf"" font-size=""9"">Balance Due</text>
<text left=""2709"" top=""1897"" width=""86"" height=""52"" font-face=""with arabic_00002.ttf"" font-size=""9"">0.00</text>";
Configuration config = new Configuration(1077, 761, 2000, 2155, 2400, 2463, 2600, 2671, 2700, "Subtotal");
config.BuyerNameStartX = 400;
config.BuyerNameEndX = 900;
config.BuyerAddressY = 705;
config.BuyerAddressStartX = 400;
config.BuyerAddressEndX = 900;
config.ProductStartPhrase = "Item";
config.InvoiceNumberY = 710;
config.InvoiceStartX = 2600;
config.InvoiceEndX = 2800;
Invoice inv = ProcessPrintXMLInv(input, config);
List<Line> rr = inv.Lines;
string json = JsonConvert.SerializeObject(inv, Formatting.Indented);
byte[] compressed = Compress(json);
string decompressed = Decompress(compressed);
Console.WriteLine(decompressed);
int originalSize = json.Length;
int compressedSize = compressed.Length;
double compressionRate = (1 - (double)compressedSize / originalSize) * 100;
Console.WriteLine("Original Size: {0} bytes",originalSize);
Console.WriteLine("Compressed Size: {0} bytes",compressedSize);
Console.WriteLine("Compression Rate: {0}",compressionRate);
private static byte[] Compress(string text)
using (MemoryStream memoryStream = new MemoryStream())
using (GZipStream gzipStream = new GZipStream(memoryStream, CompressionMode.Compress))
using (StreamWriter writer = new StreamWriter(gzipStream))
compressedData = memoryStream.ToArray();
private static string Decompress(byte[] compressedData)
string decompressedJson=null;
using (MemoryStream memoryStream = new MemoryStream(compressedData))
using (GZipStream gzipStream = new GZipStream(memoryStream, CompressionMode.Decompress))
using (StreamReader reader = new StreamReader(gzipStream))
decompressedJson = reader.ReadToEnd();
private static HashSet<Record> ProcessPrintXML(string input, Configuration config)
HashSet<Record> lines = new HashSet<Record>();
string arabicPattern = @"[\u0600-\u06FF]+";
string cleanNumbersPattern = @"[,\n]";
string newLinePattern = @"[\s]+";
string pattern = @"<text left=""([\d]+)"" top=""([\d]+)"" width=""([\d]+)"" height=""([\d]+)""[\w|\W]*?>([a-zA-Z\s0-9 #.ا-ي,\/]*)";
RegexOptions options = RegexOptions.Multiline;
foreach (Match m in Regex.Matches(input, pattern, options))
Int32 x = Int32.Parse(m.Groups[1].Value);
Int32 y = Int32.Parse(m.Groups[2].Value);
if (y == config.ProductStartY || y > config.ProductStartY)
if (m.Groups[5].Value.Equals(config.StopLinePhrase))
if (y != stopLine && y < stopLine)
r = new Record(Int32.Parse(m.Groups[2].Value));
bool f = lines.TryGetValue(r, out rr);
if (x >= config.ProductUnitPriceStartX && x <= config.ProductUnitPriceEndX)
rr.UnitPrice = Decimal.Parse(Regex.Replace(m.Groups[5].Value, cleanNumbersPattern, ""));
if (x >= config.ProductTotalPriceStartX && x <= config.ProductTotalPriceEndX)
rr.TotalPrice = Decimal.Parse(Regex.Replace(m.Groups[5].Value, cleanNumbersPattern, ""));
if (x >= config.ProductUnitQuantStartX && x <= config.ProductUnitQuantEndX)
rr.Quantity = Decimal.Parse(Regex.Replace(m.Groups[5].Value, cleanNumbersPattern, ""));
if (x >= config.ProductNameStartX && x <= config.ProductNameEndX)
if (!Regex.IsMatch(m.Groups[5].Value, arabicPattern))
rr.AddProductChunck(Regex.Replace(m.Groups[5].Value, newLinePattern, " "));
rr.AddProductChunck(Regex.Replace(ReverseString(m.Groups[5].Value), newLinePattern, " "));
private static Invoice ProcessPrintXMLInv(string input, Configuration config)
var mapperConfig = new MapperConfiguration(cfg =>
cfg.CreateMap<Record, Line>();
var mapper = mapperConfig.CreateMapper();
Invoice inv = new Invoice();
HashSet<Record> lines = new HashSet<Record>();
string arabicPattern = @"[\u0600-\u06FF]+";
string cleanNumbersPattern = @"[,\n]";
string newLinePattern = @"[\s]+";
string pattern = @"<text left=""([\d]+)"" top=""([\d]+)"" width=""([\d]+)"" height=""([\d]+)""[\w|\W]*?>([a-zA-Z\s0-9 #.ا-ي,\/]*)";
RegexOptions options = RegexOptions.Multiline;
inv.BuyerInfo = new BuyerInfo();
foreach (Match m in Regex.Matches(input, pattern, options))
Int32 x = Int32.Parse(m.Groups[1].Value);
Int32 y = Int32.Parse(m.Groups[2].Value);
if (y == config.BuyerNameY && x >= config.BuyerNameStartX && x <= config.BuyerNameEndX)
inv.BuyerInfo.Name = m.Groups[5].Value;
if (y == config.BuyerAddressY && x >= config.BuyerAddressStartX && x <= config.BuyerAddressEndX)
inv.BuyerInfo.Address = m.Groups[5].Value;
if (y == config.InvoiceNumberY && x >= config.InvoiceStartX && x <= config.InvoiceEndX)
inv.InvoiceNumber = m.Groups[5].Value;
if (y == config.ProductStartY || y > config.ProductStartY)
if (m.Groups[5].Value.Equals(config.StopLinePhrase))
if (y != stopLine && y < stopLine)
r = new Record(Int32.Parse(m.Groups[2].Value));
bool f = lines.TryGetValue(r, out rr);
if (x >= config.ProductUnitPriceStartX && x <= config.ProductUnitPriceEndX)
rr.UnitPrice = Decimal.Parse(Regex.Replace(m.Groups[5].Value, cleanNumbersPattern, ""));
if (x >= config.ProductTotalPriceStartX && x <= config.ProductTotalPriceEndX)
rr.TotalPrice = Decimal.Parse(Regex.Replace(m.Groups[5].Value, cleanNumbersPattern, ""));
if (x >= config.ProductUnitQuantStartX && x <= config.ProductUnitQuantEndX)
rr.Quantity = Decimal.Parse(Regex.Replace(m.Groups[5].Value, cleanNumbersPattern, ""));
if (x >= config.ProductNameStartX && x <= config.ProductNameEndX)
if (!Regex.IsMatch(m.Groups[5].Value, arabicPattern))
rr.AddProductChunck(Regex.Replace(m.Groups[5].Value, newLinePattern, " "));
rr.AddProductChunck(Regex.Replace(ReverseString(m.Groups[5].Value), newLinePattern, " "));
inv.Lines=lines.Select(p => mapper.Map<Line>(p)).Where(line => line.Quantity > 0).ToList();
private static string ReverseString(string input)
StringBuilder sb = new StringBuilder();
char[] array = input.ToCharArray();
for (int i = 0; i < array.Length; i++)
sealed class Configuration
public int BuyerNameY { get; set; }
public int BuyerNameStartX { get; set; }
public int BuyerNameEndX { get; set; }
public int BuyerAddressY { get; set; }
public int BuyerAddressStartX { get; set; }
public int BuyerAddressEndX { get; set; }
public int InvoiceNumberY { get; set; }
public int InvoiceStartX { get; set; }
public int InvoiceEndX { get; set; }
public int ProductStartY { get; set; }
public string ProductStartPhrase { get; set; }
public int LineHeight { get; set; }
public int ProductNameStartX { get; set; }
public int ProductNameEndX { get; set; }
public int ProductUnitPriceStartX { get; set; }
public int ProductUnitPriceEndX { get; set; }
public int ProductUnitQuantStartX { get; set; }
public int ProductUnitQuantEndX { get; set; }
public int ProductTotalPriceStartX { get; set; }
public int ProductTotalPriceEndX { get; set; }
public string StopLinePhrase { get; set; }
public Configuration(int productStartY, int productNameStartX, int productNameEndX, int productUnitPriceStartX, int productUnitPriceEndX, int productUnitQuantStartX, int productUnitQuantEndX, int productTotalPriceStartX, int productTotalPriceEndX, string stopLinePhrase)
ProductStartY = productStartY;
ProductNameStartX = productNameStartX;
ProductNameEndX = productNameEndX;
ProductUnitPriceStartX = productUnitPriceStartX;
ProductUnitPriceEndX = productUnitPriceEndX;
ProductUnitQuantStartX = productUnitQuantStartX;
ProductUnitQuantEndX = productUnitQuantEndX;
ProductTotalPriceStartX = productTotalPriceStartX;
ProductTotalPriceEndX = productTotalPriceEndX;
StopLinePhrase = stopLinePhrase;
public string Identification { get; set; }
public string Name { get; set; }
public string Address { get; set; }
public string Identification { get; set; }
public string Name { get; set; }
public string Address { get; set; }
public string InvoiceNumber { get; set; }
public string DueDate { get; set; }
public SellerInfo SellerInfo { get; set; }
public BuyerInfo BuyerInfo { get; set; }
public List<Line> Lines { get; set; }
public override string ToString()
return string.Format("InvoiceNumber={0},Buyer Name={1}, Buyer Address = {2}", InvoiceNumber, BuyerInfo.Name, BuyerInfo.Address);
protected Queue<string> product;
protected decimal quantity;
protected decimal totalPrice;
protected bool arabicText;
this.product = new Queue<string>();
public Record(int y, Queue<string> product, decimal quantity, decimal price, decimal totalPrice)
this.quantity = quantity;
this.totalPrice = totalPrice;
Console.WriteLine("Constructor created");
public Queue<string> Product
public decimal TotalPrice
public string ProductName
StringBuilder sb = new StringBuilder();
foreach (string p1 in product)
Array ar = product.ToArray();
for (int i = ar.Length - 1; i >= 0; i--)
sb.Append(ar.GetValue(i));
public void AddProductChunck(string nameChunck)
this.product.Enqueue(nameChunck);
public override int GetHashCode()
public override bool Equals(object obj)
return obj is Record && Equals((Record)obj);
public bool Equals(Record r)
public override string ToString()
return string.Format("{4}-Product name={0},Unit Price={1}, Quantity = {2},Total Price={3}", ProductName, price, quantity, totalPrice, y);
public int Y { get; set; }
public string ProductName { get; set; }
public decimal Quantity { get; set; }
public decimal UnitPrice { get; set; }
public decimal TotalPrice { get; set; }
public override string ToString()
return string.Format("{4}-Product name={0},Unit Price={1}, Quantity = {2},Total Price={3}", ProductName, UnitPrice, Quantity, TotalPrice, Y);