GOV/Webservice/Python-suds: Unterschied zwischen den Versionen

aus GenWiki, dem genealogischen Lexikon zum Mitmachen.
< GOV‎ • Webservice
Zur Navigation springen Zur Suche springen
(weitere Spalten)
 
(5 dazwischenliegende Versionen desselben Benutzers werden nicht angezeigt)
Zeile 1: Zeile 1:
== GOV-Typen nach SQLite Importieren ==
== GOV-Typen nach SQLite Importieren ==
=== Create Table f_gov_type ===
=== Create Table f_gov_type ===
<source lang="SQL">
<syntaxhighlight lang="SQL">
-- Objekttypen-Tabelle in SQLite anlegen
-- Objekttypen-Tabelle in SQLite anlegen
DROP TABLE IF EXISTS f_gov_type;
DROP TABLE IF EXISTS f_gov_type;
Zeile 10: Zeile 10:
type_desc_2  text
type_desc_2  text
);
);
</source>
</syntaxhighlight>


=== Import nach f_gov_type ===
=== Import nach f_gov_type ===
Dieses importiert alle Objektypen-Texte in eine SQLite-Tabelle.
Dieses importiert alle Objektypen-Texte in eine SQLite-Tabelle.


<source lang="python">from SimpleServiceImpl_services import *
<syntaxhighlight lang="python">from SimpleServiceImpl_services import *
# Datenbank mit Pfad
# Datenbank mit Pfad
sql_db = r'C:\temp\Demo\data\gov_demo.db'
sql_db = r'C:\temp\Demo\data\gov_demo.db'
Zeile 52: Zeile 52:
         continue
         continue
print ('Anzahl Objekt-Typen: ' + str(j))
print ('Anzahl Objekt-Typen: ' + str(j))
</source>
</syntaxhighlight>


== GOV-Einträge über Koordinaten nach SQLite als json importieren ==
== GOV-Einträge über Koordinaten nach SQLite als json importieren ==
=== Create Table ===
=== Create Table ===
<source lang="SQL">
<syntaxhighlight lang="SQL">
DROP TABLE IF EXISTS f_gov_data;
DROP TABLE IF EXISTS f_gov_data;


Zeile 65: Zeile 65:
item_childs  text -- Liste der Kind-Objekte als Json
item_childs  text -- Liste der Kind-Objekte als Json
);
);
</source>
</syntaxhighlight>


=== Import über Koordinaten ===
=== Import über Koordinaten ===
<source lang="python">
<syntaxhighlight lang="python">
  # Umrechnng laufende Tage pro Jahr
  # Umrechnung laufende Tage pro Jahr
import juliandate
import juliandate


Zeile 89: Zeile 89:
     out = {}
     out = {}
     for k, v in asdict(d).items():
     for k, v in asdict(d).items():
         # Zeitumrechung
         # Zeitumrechnung
         if k == '_jd':
         if k == '_jd':
             dt = juliandate.to_gregorian(v)
             dt = juliandate.to_gregorian(v)
Zeile 108: Zeile 108:
             out[k] = v
             out[k] = v
     return out
     return out
sql_db = r'C:\temp\Demo\data\gov_demo.db'
sql_db = r'C:\temp\Demo\data\gov_demo.db'
conn = sqlite3.connect(sql_db, isolation_level=None)
conn = sqlite3.connect(sql_db, isolation_level=None)
c = conn.cursor()
# Objekt-Typ-Nummern laden
sql_typ = ("SELECT type_id, " \
          "case when (type_desc_2 = '') then type_desc_1 else type_desc_1 || ' (' || type_desc_2 || ')' end as desc "  \
          "FROM f_gov_type order by type_id")
c.execute(sql_typ)
rows = c.fetchall()
# lookup dictionary zur Suche über Typ-Nummer
gov_type_desc_dict = dict (rows)


# Da nur die ersten 500 Treffer geliefert werden, muss der Abfrage-Bereich klein genug gewählt werden.
# Da nur die ersten 500 Treffer geliefert werden, muss der Abfrage-Bereich klein genug gewählt werden.
Zeile 145: Zeile 137:
               "'" + gov_item + "','" + gov_eintrag_txt + "','" +  gov_eintrag_child_txt + "')"
               "'" + gov_item + "','" + gov_eintrag_txt + "','" +  gov_eintrag_child_txt + "')"
     rc = c.execute(sql_ins)
     rc = c.execute(sql_ins)
</source>
</syntaxhighlight>


== Json- Views ==
== Json- Views ==
Zeile 151: Zeile 143:
Es können alle Arrays per SQL-Selektion abgefragt werden. Hier ein Beispiel.
Es können alle Arrays per SQL-Selektion abgefragt werden. Hier ein Beispiel.


<source lang="SQL">
<syntaxhighlight lang="SQL">
drop view if exists v_gov_part_of_govid_1_3;
drop view if exists v_gov_part_of_govid_1_3;


create view v_gov_part_of_govid_1_3 as  
create view v_gov_part_of_govid_1_3 as  
select gov_id, breite, laenge, name_1, sprache_1, name_2, sprache_2,  
select gov_id, item_data,
      breite, laenge, hoehe,
      plz_1, plz_1_ab, plz_1_bis,
      plz_2, plz_2_ab, plz_2_bis,
      name_1, sprache_1, name_2, sprache_2,  
       part_of_govid_1, part_of_govid_2, part_of_govid_3, text,
       part_of_govid_1, part_of_govid_2, part_of_govid_3, text,
       case when (type_desc_2="") then f_gov_type.type_desc_1 else type_desc_1 || ' (' || type_desc_2 || ')' end as typ
       case when (type_desc_2="") then f_gov_type.type_desc_1 else type_desc_1 || ' (' || type_desc_2 || ')' end as typ
from
from
(
(
   select  gov_id,  
   select  gov_id, item_data,
           trim(json_extract(item_data,'$.position._lat')) as breite,
           json_extract(item_data,'$.position._lat') as breite,
           trim(json_extract(item_data,'$.position._long')) as laenge,
           json_extract(item_data,'$.position._lon') as laenge,
           trim(json_extract(item_data,'$.postal-code[0]._value')) as plz,
           json_extract(item_data,'$.position._height') as hoehe,
           trim(json_extract(item_data,'$.postal-code[0].begin_value')) as plz,
--
          json_extract(item_data,'$.postal-code[0]._value') as plz_1,
          json_extract(item_data,'$.postal-code[0].timespan.begin._jd') as plz_1_ab,
           json_extract(item_data,'$.postal-code[0].timespan.end._jd') as plz_1_bis,
--
          json_extract(item_data,'$.postal-code[1]._value') as plz_2,
          json_extract(item_data,'$.postal-code[1].timespan.begin._jd') as plz_2_ab,
          json_extract(item_data,'$.postal-code[1].timespan.end._jd') as plz_2_bis,
--
--
           trim(json_extract(item_data,'$.name[0]._value')) as name_1,
           json_extract(item_data,'$.name[0]._value') as name_1,
           trim(json_extract(item_data,'$.name[0]._lang')) as sprache_1,
           json_extract(item_data,'$.name[0]._lang') as sprache_1,
           trim(json_extract(item_data,'$.name[1]._value')) as name_2,
           json_extract(item_data,'$.name[1]._value') as name_2,
           trim(json_extract(item_data,'$.name[1]._lang')) as sprache_2,
           json_extract(item_data,'$.name[1]._lang') as sprache_2,
           trim(json_extract(item_data,'$.part-of[0]._ref')) AS part_of_govid_1,
--
           trim(json_extract(item_data,'$.part-of[1]._ref')) as part_of_govid_2,
           json_extract(item_data,'$.part-of[0]._ref') AS part_of_govid_1,
           trim(json_extract(item_data,'$.part-of[2]._ref')) as part_of_govid_3,
           json_extract(item_data,'$.part-of[1]._ref') as part_of_govid_2,
           trim(json_extract(item_data,'$.note[0].text')) as text,
           json_extract(item_data,'$.part-of[2]._ref') as part_of_govid_3,
           trim(json_extract(item_data,'$.type[0]._value')) as t_1
           json_extract(item_data,'$.note[0].text') as text,
           json_extract(item_data,'$.type[0]._value') as t_1
   from f_gov_data
   from f_gov_data
) a
) a
Zeile 180: Zeile 184:
on a.t_1 = f_gov_type.type_id
on a.t_1 = f_gov_type.type_id
;
;
</source>
</syntaxhighlight>


=== Abfrage auf Externe Kennungen ===
=== Abfrage auf Externe Kennungen ===
Auf diese Weise können Dubletten ermittelt werden.
Auf diese Weise können Dubletten ermittelt werden.
<source lang="SQL">
<syntaxhighlight lang="SQL">
drop view if exists v_gov_external_ref_1_6;
drop view if exists v_gov_external_ref_1_6;


Zeile 219: Zeile 223:
order by gov_id, extref
order by gov_id, extref
;
;
</source>
</syntaxhighlight>
 
== Weblinks ==
* https://www.sqlite.org/lang_corefunc.html
* https://www.sqlite.org/json1.html
* https://pypi.org/project/suds/
* https://stackoverflow.com/questions/2412486/serializing-a-suds-object-in-python


[[Kategorie:GOV-Intern]]
[[Kategorie:GOV-Intern]]

Aktuelle Version vom 28. August 2023, 05:11 Uhr

GOV-Typen nach SQLite Importieren

Create Table f_gov_type

-- Objekttypen-Tabelle in SQLite anlegen
DROP TABLE IF EXISTS f_gov_type;

CREATE TABLE f_gov_type (
type_id       integer,
type_desc_1   text,
type_desc_2   text
);

Import nach f_gov_type

Dieses importiert alle Objektypen-Texte in eine SQLite-Tabelle.

from SimpleServiceImpl_services import *
# Datenbank mit Pfad
sql_db = r'C:\temp\Demo\data\gov_demo.db'

from suds.client import Client
simple_service_url = 'http://gov.genealogy.net/services/SimpleService?wsdl'
read_client = Client(simple_service_url)

# Zugriff auf SQLlite
import sqlite3
# https://docs.python.org/3/library/sqlite3.html

# Zugriff auf SQLite-DB mit Autocommit
conn = sqlite3.connect(sql_db, isolation_level=None)

# Tabelle leeren
c = conn.cursor()
c.execute('DELETE FROM f_gov_type')
# Anzahl der Typen eintragen (Wert schätzen)
j = 0
for i in range(1, 290):
    try:
        gov_eintrag = read_client.service.getTypeDescription(i, 'deu')
        j = j + 1
        (desc_1, desc_2) = (gov_eintrag[0])
        if (desc_2 == None):
            desc_2 = ''
        ins_sql = 'INSERT INTO f_gov_type(type_id, type_desc_1, type_desc_2) ' + \
                  'VALUES (' + str(i) + \
                  ",'" + desc_1 + \
                  "','" + desc_2 + "')"
        # print('SQL:' + ins_sql)
        c.execute(ins_sql)
    # wenn es die Nummer nicht gibt - überspringen
    except:
        continue
print ('Anzahl Objekt-Typen: ' + str(j))

GOV-Einträge über Koordinaten nach SQLite als json importieren

Create Table

DROP TABLE IF EXISTS f_gov_data;

CREATE TABLE f_gov_data (
lfd_id        integer primary key autoincrement, -- Schlüssel in der Tabelle
gov_id        text, -- GOV-Kennung
item_data     text, -- GOV-Eintrag als Json
item_childs   text -- Liste der Kind-Objekte als Json
);

Import über Koordinaten

 # Umrechnung laufende Tage pro Jahr
import juliandate

from suds.client import Client
simple_service_url = 'http://gov.genealogy.net/services/SimpleService?wsdl'
read_client = Client(simple_service_url)

# Datenspeicherung als json
import json

# Zugriff auf SQLlite
import sqlite3
# https://docs.python.org/3/library/sqlite3.html

# Serialisieren WSDL-Suds-Objekt als Dictionary
from suds.sudsobject import asdict

def recursive_asdict(d):
    out = {}
    for k, v in asdict(d).items():
        # Zeitumrechnung
        if k == '_jd':
            dt = juliandate.to_gregorian(v)
            vc = str(10000 * dt[0] + 100 * dt[1] + dt[2])
            v = vc[0:4] + '-' + vc[4:6] + '-' + vc[6:8]
        if k == '_last-modification':
            v = str(v)
        if hasattr(v, '__keylist__'):
            out[k] = recursive_asdict(v)
        elif isinstance(v, list):
            out[k] = []
            for item in v:
                if hasattr(item, '__keylist__'):
                    out[k].append(recursive_asdict(item))
                else:
                    out[k].append(item)
        else:
            out[k] = v
    return out

sql_db = r'C:\temp\Demo\data\gov_demo.db'
conn = sqlite3.connect(sql_db, isolation_level=None)

# Da nur die ersten 500 Treffer geliefert werden, muss der Abfrage-Bereich klein genug gewählt werden.
# Ein Raster von 0.1 Dezimal-Grad ist geeignet.
latitude0 = 50.65
latitude1 = 50.75
longitude0 = 5.80
longitude1 = 6.90

# Liste der GOV-IDs über Koordinaten (max. 500)
item_list = read_client.service.searchByBoundingBox(latitude0, latitude1, longitude0, longitude1)

for i, gov_item in enumerate (item_list.item):
    gov_eintrag = read_client.service.getObject(gov_item)
    gov_eintrag_dict = recursive_asdict(gov_eintrag)
    gov_eintrag_json = json.dumps (gov_eintrag_dict, sort_keys=True, default=str, ensure_ascii=False)
    gov_eintrag_txt = gov_eintrag_json.replace("'", "''")

    gov_eintrag_child = read_client.service.getChildObjects(gov_item)
    gov_eintrag_child_dict = recursive_asdict(gov_eintrag_child)
    gov_eintrag_child_json = json.dumps(gov_eintrag_child_dict, sort_keys=True, default=str, ensure_ascii=False)
    gov_eintrag_child_txt = gov_eintrag_child_json.replace("'", "''")
    # print ('GOV-ID: ' + gov_item )

    sql_ins = 'insert into f_gov_data (gov_id,  item_data, item_childs) values (' + \
               "'" + gov_item + "','" + gov_eintrag_txt + "','" +  gov_eintrag_child_txt + "')"
    rc = c.execute(sql_ins)

Json- Views

Abfrage auf Arrays

Es können alle Arrays per SQL-Selektion abgefragt werden. Hier ein Beispiel.

drop view if exists v_gov_part_of_govid_1_3;

create view v_gov_part_of_govid_1_3 as 
select gov_id, item_data,
       breite, laenge, hoehe,
       plz_1, plz_1_ab, plz_1_bis,
       plz_2, plz_2_ab, plz_2_bis,
       name_1, sprache_1, name_2, sprache_2, 
       part_of_govid_1, part_of_govid_2, part_of_govid_3, text,
       case when (type_desc_2="") then f_gov_type.type_desc_1 else type_desc_1 || ' (' || type_desc_2 || ')' end as typ
from
(
  select  gov_id, item_data,
          json_extract(item_data,'$.position._lat') as breite,
          json_extract(item_data,'$.position._lon') as laenge,
          json_extract(item_data,'$.position._height') as hoehe,
--
          json_extract(item_data,'$.postal-code[0]._value') as plz_1,
          json_extract(item_data,'$.postal-code[0].timespan.begin._jd') as plz_1_ab,
          json_extract(item_data,'$.postal-code[0].timespan.end._jd') as plz_1_bis,
--
          json_extract(item_data,'$.postal-code[1]._value') as plz_2,
          json_extract(item_data,'$.postal-code[1].timespan.begin._jd') as plz_2_ab,
          json_extract(item_data,'$.postal-code[1].timespan.end._jd') as plz_2_bis,
--
          json_extract(item_data,'$.name[0]._value') as name_1,
          json_extract(item_data,'$.name[0]._lang') as sprache_1,
          json_extract(item_data,'$.name[1]._value') as name_2,
          json_extract(item_data,'$.name[1]._lang') as sprache_2,
--
          json_extract(item_data,'$.part-of[0]._ref') AS part_of_govid_1,
          json_extract(item_data,'$.part-of[1]._ref') as part_of_govid_2,
          json_extract(item_data,'$.part-of[2]._ref') as part_of_govid_3,
          json_extract(item_data,'$.note[0].text') as text,
          json_extract(item_data,'$.type[0]._value') as t_1
  from f_gov_data
) a
left join f_gov_type
on a.t_1 = f_gov_type.type_id
;

Abfrage auf Externe Kennungen

Auf diese Weise können Dubletten ermittelt werden.

drop view if exists v_gov_external_ref_1_6;

create view v_gov_external_ref_1_6 as 
select  gov_id, 
        json_extract(item_data,'$.external-reference[0]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[0]') IS NOT NULL 
 union all
select  gov_id, 
        json_extract(item_data,'$.external-reference[1]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[1]') IS NOT NULL 
 union all
select  gov_id, 
        json_extract(item_data,'$.external-reference[2]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[2]') IS NOT NULL 
 union all
select  gov_id, 
        json_extract(item_data,'$.external-reference[3]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[3]') IS NOT NULL 
 union all
select  gov_id, 
        json_extract(item_data,'$.external-reference[4]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[4]') IS NOT NULL 
 union all
select  gov_id, 
        json_extract(item_data,'$.external-reference[5]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[5]') IS NOT NULL 
order by gov_id, extref
;

Weblinks