Sniffing Game Market Data is throwing an Error python

126 views Asked by At

there is this game called albion online which is an mmorpg, I had an idea of sniffing the data of the market and analysing it in a small project of mine

FYI: the game allows sniffing of market data as they havenot gotten around to making an API and here is the proof : https://forum.albiononline.com/index.php/Thread/51604-Is-it-allowed-to-scan-your-internet-trafic-and-pick-up-logs/?pageNo=2

I found the following library on GitHub which does just that called AlbiPy and here is the link to the library: https://github.com/hrichharms/AlbiPy/blob/master/AlbiPy.py

import socket
import json
import threading
import platform
from datetime import datetime

PROBLEMS = ["'", "$", "QH", "?8", "H@", "ZP"]
HEADERS = ["Id", "UnitPriceSilver", "TotalPriceSilver", "Amount", "Tier", "IsFinished",
           "AuctionType", "HasBuyerFetched", "HasSellerFetched", "SellerCharacterId",
           "SellerName", "BuyerCharacterId", "BuyerName", "ItemTypeId", "ItemGroupTypeId",
           "EnchantmentLevel", "QualityLevel", "Expires", "ReferenceId"]


def local_ip():
    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    s.connect(("8.8.8.8", 80))
    ip = s.getsockname()[0]
    s.close()
    return ip


class datapoint:
    """ Single market datapoint including all available data from the game's api"""

    def __init__(self, data):
        # data attribute
        self.data = data[:]
        # correct silver prices
        data[1] //= 10000
        data[2] //= 10000
        # convert expire date to datetime object
        data[17] = datetime.strptime(data[17][0:16], "%Y-%m-%dT%H:%M")
        # set attributes to data indexes
        self.Id = data[0]
        self.UnitPriceSilver = data[1]
        self.TotalPriceSilver = data[2]
        self.Amount = data[3]
        self.Tier = data[4]
        self.IsFinished = data[5]
        self.AuctionType = data[6]
        self.HasBuyerFetched = data[7]
        self.HasSellerFetched = data[8]
        self.SellerCharacterId = data[9]
        self.SellerName = data[10]
        self.BuyerCharacterId = data[11]
        self.BuyerName = data[12]
        self.ItemTypeId = data[13]
        self.ItemGroupTypeId = data[14]
        self.EnchantmentLevel = data[15]
        self.QualityLevel = data[16]
        self.Expires = data[17]
        self.ReferenceId = data[18]


class sniffer_data:
    """ Organized sniffed market data"""

    def __init__(self, logs, parsed, malformed):
        self.logs = logs[:]
        self.parsed = parsed[:]
        self.malformed = malformed[:]

    def __getitem__(self, i):
        return self.parsed[i]

    def __len__(self):
        return len(self.parsed)

    def __str__(self):
        parsed = [{HEADERS[j]: attribute for j, attribute in enumerate(i.data)} for i in self.parsed]
        return json.dumps({"logs": self.logs, "parsed": parsed, "malformed": self.malformed})


class sniffing_thread(threading.Thread):
    """ Sniffing thread class"""

    def __init__(self, problems=PROBLEMS):

        threading.Thread.__init__(self)

        # set problems list
        self.problems = problems

        # define thread attributes
        self.n = 0
        self.e = 0
        self.parsed = []
        self.malformed = []
        self.recording = False
        self.last_parsed = True
        # log list with placeholder entry
        self.logs = [""]

        # initialize socket object
        if platform.system() != "Windows":
            self.sniffer = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_UDP)

        # socket setup for windows environment
        if platform.system() == "Windows":
            self.sniffer = socket.socket(socket.AF_INET, socket.SOCK_RAW)
            self.sniffer.bind((local_ip(), 0))
            self.sniffer.ioctl(socket.SIO_RCVALL, socket.RCVALL_ON)


    def run(self):

        # set recording to True
        self.recording = True

        # while the thread is set to recording, sniff and record data
        while self.recording:

            # wait for market data
            try:
                data = self.sniffer.recvfrom(1350)[0]
            except OSError:
                pass

            # remove known problematic strings from data
            data = str(data)
            for p in self.problems:
                data = data.replace(p, "")

            # partition received cleaned data into chunks
            chunks = [s[3:] for s in data.split("\\") if len(s) > 5 and ("Silver" in s or "ReferenceId" in s)]

            # processed chunks
            for chunk in chunks:
                # if this chunk is the start of a new piece of market information, add a new entry to the log
                if "{" in chunk[:4]:
                    self.logs.append(chunk[chunk.find("{"):])
                # otherwise, this chunk is assumed to be a continuation of the last chunk and is simply concatenated to the end
                elif self.logs:
                    self.logs[-1] += chunk
            
            # set last parsed to false
            self.last_parsed = False

        if not self.last_parsed:
            self.parse_data()


    def parse_data(self):
        """ Parse the data currently collected by the thread"""
        self.parsed = []
        self.malformed = []
        if not self.logs[0]:
            self.logs.pop(0)
        for i, log in enumerate(self.logs):
            try:
                self.parsed.append(datapoint(list(json.loads(log).values())))
            except json.decoder.JSONDecodeError:
                self.malformed.append(self.logs[i])
        self.last_parsed = True


    def get_data(self):
        """ Get the latest data from sniffing thread"""
        # if no logs have been recorded
        if self.logs == [""]:
            return sniffer_data([], [], [])

        # parse logs, record malformed logs, and count total logs and malformed logs
        if not self.last_parsed:
            self.parse_data()
        
        # return parsed data
        return sniffer_data(self.logs, self.parsed, self.malformed)


    def stop(self):
        """ Stop the sniffing thread"""
        self.recording = False

the problem is when I try to run it I get the following Error

Traceback (most recent call last):
  File "c:\Users\pc\Desktop\code\program 3.0\tests2.py", line 125, in <module>
    orders = thread.get_data()
             ^^^^^^^^^^^^^^^^^
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 165, in get_data
    self.parse_data()
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 151, in parse_data
    self.parsed.append(datapoint(list(json.loads(log).values())))
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 32, in __init__
    data[17] = datetime.strptime(data[17][0:16], "%Y-%m-%dT%H:%M")
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\pc\AppData\Local\Programs\Python\Python311\Lib\_strptime.py", line 568, in _strptime_datetime
    tt, fraction, gmtoff_fraction = _strptime(data_string, format)
                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\pc\AppData\Local\Programs\Python\Python311\Lib\_strptime.py", line 349, in _strptime
    raise ValueError("time data %r does not match format %r" %
ValueError: time data '2023-1H,2-27T15:' does not match format '%Y-%m-%dT%H:%M'
Exception in thread Thread-1:
Traceback (most recent call last):
  File "C:\Users\pc\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 140, in run
    self.parse_data()
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 151, in parse_data
    self.parsed.append(datapoint(list(json.loads(log).values())))
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 32, in __init__
    data[17] = datetime.strptime(data[17][0:16], "%Y-%m-%dT%H:%M")
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\pc\AppData\Local\Programs\Python\Python311\Lib\_strptime.py", line 568, in _strptime_datetime
    tt, fraction, gmtoff_fraction = _strptime(data_string, format)
                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\pc\AppData\Local\Programs\Python\Python311\Lib\_strptime.py", line 349, in _strptime
    raise ValueError("time data %r does not match format %r" %
ValueError: time data '2023-1H,2-27T15:' does not match format '%Y-%m-%dT%H:%M'

from my basic understanding either get a valid string or rearrange the strptime to match the format that is provided

the problem there is that the format provided by this sniffing library keeps changing some times its 2023-1H,2-27T15 other times its 2023-28-11?t03-15 so from my basic understanding this might be a problem with how this sniffing library is processing the data

my understanding of networking and sniffing is very limited and next to none, if someone has any ideas how i can run this library without it throwing an error please help me

update:

i have done the fix that was suggested with the try and except for ValueError and TypeError

and it worked for a little bit, then another error came up

Traceback (most recent call last):
Exception in thread Thread-1:
  File "c:\Users\pc\Desktop\code\program 3.0\tests2.py", line 119, in <module>
Traceback (most recent call last):
  File "C:\Users\pc\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 1038, in _bootstrap_inner
    input_prices()
  File "c:\Users\pc\Desktop\code\program 3.0\tests2.py", line 112, in input_prices
    self.run()
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 148, in run
    orders = thread.get_data()
             ^^^^^^^^^^^^^^^^^
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 173, in get_data
    self.parse_data()
    self.parse_data()
    self.parsed.append(datapoint(list(json.loads(log).values())))
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 159, in parse_data
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 35, in __init__
    self.parsed.append(datapoint(list(json.loads(log).values())))
    data[17] = datetime.strptime(data[17][0:16], "%Y-%m-%dT%H:%M")
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                                 ~~~~^^^^
IndexError: list index out of range
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 35, in __init__
    data[17] = datetime.strptime(data[17][0:16], "%Y-%m-%dT%H:%M")
                                 ~~~~^^^^
IndexError: list index out of range

i have tried adding another except statment that checks if IndexError like so

except IndexError:
    ssasd = datetime(1900, 1, 1, 0, 0)
    data.insert(17, ssasd)

but its not working, if anyone has a solution please provide one

3

There are 3 answers

3
furas On BEST ANSWER

You may have to edit module and use try/except to catch error and put some fake date

try:
    data[17] = datetime.strptime(data[17][0:16], "%Y-%m-%dT%H:%M")
except ValueError:
    data[17] = datetime.datetime(1900, 1, 1, 0, 0)  # fake value `1900-01-01 00:00`

If you know other patterns which can match then you can use for-loop to test them - like this:

result = datetime.datetime(1900, 1, 1, 0, 0)  # some default value 

for pattern in ["%Y-%m-%dT%H:%M", "%Y-%m-%d?%H:%M"]:
    try:
        result = datetime.strptime(data[17][0:16], pattern)
        break   # exit if it match pattern
    except ValueError:
        print('Error for:', pattern)
    
data[17] = result    

If it will help then you may send problem and code to author of module - to Issues on GitHub.


EDIT:

It seems this problem is already in Issues on GitHub since Feb 14, 2022

Errors when parsing data · Issue #5 · hrichharms/AlbiPy

7
banom On

so this solution involves the editing of line 32 of the script

data[17] = datetime.strptime(data[17][0:16], "%Y-%m-%dT%H:%M")

just as said by the lovely gentleman furas i have to implement a try and except statment and here is what worked in fixing the problem

try:
    data[17] = datetime.strptime(data[17][0:16], "%Y-%m-%dT%H:%M")
except ValueError:
    data[17] = datetime(1900, 1, 1, 0, 0)
except TypeError:
    data[17] = datetime(1900, 1, 1, 0, 0)

I added a TypeError too bec some times it shows this error

Traceback (most recent call last):
Exception in thread Thread-1:
  File "c:\Users\pc\Desktop\code\program 3.0\tests2.py", line 125, in <module>
Traceback (most recent call last):
  File "C:\Users\pc\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 1038, in _bootstrap_inner
    orders = thread.get_data()
    self.run()
             ^^^^^^^^^^^^^^^^^
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 148, in run
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 173, in get_data
    self.parse_data()
    self.parse_data()
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 159, in parse_data
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 159, in parse_data
    self.parsed.append(datapoint(list(json.loads(log).values())))
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    self.parsed.append(datapoint(list(json.loads(log).values())))
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 35, in __init__
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\pc\Desktop\code\program 3.0\AlbiPy.py", line 35, in __init__
    data[17] = datetime.strptime(data[17][0:16], "%Y-%m-%dT%H:%M")
                                 ~~~~~~~~^^^^^^
    data[17] = datetime.strptime(data[17][0:16], "%Y-%m-%dT%H:%M")
TypeError: 'int' object is not subscriptable
                                 ~~~~~~~~^^^^^^
TypeError: 'int' object is not subscriptable

so i didnot understand a thing from that error msg other than a TypeError so i just slapped an Except statment for TypeError

0
banom On

so after a bit of time i think i figured out what is the problem (I think)

the way this scrapper reads the packets, sometimes (not always)it gets packets it cannot read, so it doesnot add it to the data list, so it puts everything out of order (I think) how I did a work around is pretty much the equivalence to slaping a bandit on it and call it aday

here is my solution

class datapoint:
    """ Single market datapoint including all available data from the game's api"""

    def __init__(self, data):
        # data attribute
        self.data = data[:]
        # correct silver prices

        data[1] //= 10000
        data[2] //= 10000
        # convert expire date to datetime obje
        try:
            data[17] = datetime.strptime(data[17][0:16], "%Y-%m-%dT%H:%M")
        except ValueError:
            data[17] = datetime(1900, 1, 1, 0, 0)
        except TypeError:
            data[17] = datetime(1900, 1, 1, 0, 0)
        except IndexError:
            ssasd = datetime(1900, 1, 1, 0, 0)
            data.insert(17, ssasd)

            
        # set attributes to data indexes
        try:
            self.Id = data[0]
        except IndexError:
            self.Id = "NULL"

        try:
            self.UnitPriceSilver = data[1]
        except IndexError:
            self.UnitPriceSilver = 0

        try:
            self.TotalPriceSilver = data[2]
        except IndexError:
            self.TotalPriceSilver = 0

        try:
            self.Amount = data[3]
        except IndexError:
            self.Amount = 0

        try:
            self.Tier = data[4]
        except IndexError:
            self.Tier = 0

        try:
            self.IsFinished = data[5]
        except IndexError:
            self.IsFinished = "false"

        try:
            self.AuctionType = data[6]
        except IndexError:
            self.AuctionType = "offer"

        try:
            self.HasBuyerFetched = data[7]
        except IndexError:
            self.HasBuyerFetched = "false"

        try:
            self.HasSellerFetched = data[8]
        except IndexError:
            self.HasSellerFetched = "false"

        try:
            self.SellerCharacterId = data[9]
        except IndexError:
            self.SellerCharacterId = "c00fd0f0-0ca0-00b0-aeaa-a00ee0f00000"

        try:
            self.SellerName = data[10]
        except IndexError:
            self.SellerName = "NULL"

        try:
            self.BuyerCharacterId = data[11]
        except IndexError:
            self.BuyerCharacterId = "a00aa0a0-0aa0-00a0-aaaa-a00aa0a00000"

        try:
            self.BuyerName = data[12]
        except IndexError:
            self.BuyerName = "NULL"

        try:
            self.ItemTypeId = data[13]
        except IndexError:
            self.ItemTypeId = "NULL"

        try:
            self.ItemGroupTypeId = data[14]
        except IndexError:
            self.ItemGroupTypeId = "NULL"

        try:
            self.EnchantmentLevel = data[15]
        except IndexError:
            self.EnchantmentLevel = 0

        try:
            self.QualityLevel = data[16]
        except IndexError:
            self.QualityLevel = 0

        try:
            self.Expires = data[17]
        except IndexError:
            self.Expires = datetime(1900, 1, 1, 0, 0)

        try:
            self.ReferenceId = data[18]
        except IndexError:
            self.ReferenceId = 'aa0000aa-00aa-00a0-aaa0-0a0a00aa00aa'

pretty much each "record" is 18 points from prices to dates of orders when one of them is not read it puts everything else out of ordor and gives index error bec now there is 1 less item in the index than intended, i pretty much just filled all 18 spaces so i can move on with my day and not get an index error as am not gona use the specific list that is corrupted if that makes sense and i will proccess the data later on after i extracted it from the sniffer

speicial thanks to furas for helping me , you the real mvp

hope I made any sense